コード例 #1
0
def crawler_req(visa_type, place, start_time, requests):
    try:
        # prepare session
        sess = session_op.get_session(visa_type, place)
        if not sess:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, place, "No Session"))
            return
        refresh_endpoint = g.value(
            "crawler_node", "") + "/refresh/?session=" + sess
        try:
            r = requests.get(refresh_endpoint, timeout=7,
                             proxies=g.value("proxies", None))
        except:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, place, "Endpoint Timeout"))
            check_crawler_node()
            return
        if r.status_code != 200:
            logger.warning("%s, %s, %s, FAILED, %s" % (
                start_time, visa_type, place, "Endpoint Inaccessible"))
            check_crawler_node()
            return
        result = r.json()
        if result["code"] > 0:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, place, "Session Expired"))
            session_op.replace_session(visa_type, place, sess)
            return
        date = tuple(map(int, result["msg"].split("-")))
        logger.info("%s, %s, %s, SUCCESS, %s" %
                    (start_time, visa_type, place, date))
        g.assign("status_%s_%s" % (visa_type, place), date)
    except:
        logger.error(traceback.format_exc())
コード例 #2
0
def check_crawler_node():
    if g.value("crawler_checking", False):
        return
    g.assign("crawler_checking", True)
    crawler_filepath = g.value("crawler_path", None)
    last_node = g.value("crawler_node", "")
    if not crawler_filepath:
        logger.warning("Crawler file not found")
        g.assign("crawler_checking", False)
        return
    with open(crawler_filepath, "r") as f:
        nodes = list(f.readlines())
    for node in nodes:
        node = node.strip()
        try:
            r = requests.get(node, timeout=5)
            if r.status_code == 200:
                if last_node != node:
                    g.assign("crawler_node", node)
                    logger.warning("Choose Crawler Node: " + node)
                g.assign("crawler_checking", False)
                return
        except:
            pass
    logger.error("All Crawler Nodes Failed")
    g.assign("crawler_checking", False)
コード例 #3
0
ファイル: fast_visa.py プロジェクト: jian-en/us-visa
def crawler_req(visa_type, place):
    try:
        # prepare session
        sess = session_op.get_session(visa_type, place)
        if not sess:
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "No Session"))
            return
        cookies = copy.deepcopy(g.COOKIES)
        cookies["sid"] = sess
        # send request
        r = requests.get(g.CANCEL_URI, headers=g.HEADERS, cookies=cookies, proxies=g.value("proxies", None))
        if r.status_code != 200:
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired"))
            session_op.replace_session(visa_type, place, sess)
            return
        # parse HTML
        page = r.text
        date = get_date(page)
        if not date:
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired"))
            session_op.replace_session(visa_type, place, sess)
            return
        elif date == (0, 0, 0):
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Date Not Found"))
            last_status = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0))
            if last_status != (0, 0, 0): 
                session_op.replace_session(visa_type, place, sess)
            elif not check_alive(page):
                logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired"))
                session_op.replace_session(visa_type, place, sess)
            return
        logger.info("%s, %s, SUCCESS, %s" % (visa_type, place, date))
        g.assign("status_%s_%s" % (visa_type, place), date)
    except:
        logger.error(traceback.format_exc())
コード例 #4
0
def merge(fn, s, cur, visa_type):
    status = g.value("merge_lock" + visa_type, 0)
    if status == 1:
        return
    g.assign("merge_lock" + visa_type, 1)
    orig = json.loads(open(fn).read()) if os.path.exists(fn) else {}
    open(fn.replace('.json', '-last.json'),
         'w').write(json.dumps(orig, ensure_ascii=False))
    last = copy.deepcopy(orig)
    for k in s:
        if '2-' in k:
            orig[k] = min_date(orig.get(k, '/'), s[k])
        else:
            orig[k] = s[k]
    if cur not in orig.get('index', []):
        orig['index'] = [cur] + orig.get('index', [])
    orig['index'], o = orig['index'][:50], orig['index'][50:]
    rmkeys = [i for i in orig if i.split('-')[-1] in o]
    for r in rmkeys:
        orig.pop(r)
    open(fn, 'w').write(json.dumps(orig, ensure_ascii=False))
    g.assign("merge_lock" + visa_type, 0)
    subprocess.check_call([
        'python3', 'notify.py', '--type', visa_type, '--js',
        json.dumps(orig, ensure_ascii=False), '--last_js',
        json.dumps(last, ensure_ascii=False)
    ])
コード例 #5
0
    def check_crawler_server_connection():
        """ Check the connection of all the crawler server.
            Update the current crawler server in use.
        """
        if G.value('checking_crawler_connection', False):
            return

        G.assign('checking_crawler_connection', True)
        crawler_path = G.value('crawler_path', None)
        previous_crawler_node = G.value('current_crawler_node', '')

        if crawler_path is None or not os.path.exists(crawler_path):
            LOGGER.warning(
                'GlobalVar crawler file path is not found or path not valid.')
            G.assign('checking_crawler_connection', False)
            return

        with open(crawler_path) as f:
            crawler_server_lst = [line.strip() for line in f.readlines()]

        for crawler_node in crawler_server_lst:
            try:
                res = requests.get(crawler_node, timeout=5)
                if res.status_code == 200 and previous_crawler_node != crawler_node:
                    G.assign('current_crawler_node', crawler_node)
                    LOGGER.warning('Choose crawler node: %s', crawler_node)
                    G.assign('checking_crawler_connection', False)
                    return
            except Exception:
                pass

        LOGGER.error('All crawler servers fail!')
        G.assign('checking_crawler_connection', False)
コード例 #6
0
 def init_cache(self):
     session_file = g.value("session_file", "session.json")
     session = {}
     if os.path.exists(session_file):
         with open(session_file, "r") as f:
             try:
                 session = json.load(f)
             except:
                 pass
     g.assign("session", session)
コード例 #7
0
 def get_session(self, visa_type, place):
     # get a session given visa type and place. return None if failed.
     session = g.value("session", {})
     if visa_type not in session or place not in session[visa_type]:
         return None
     idx = g.value("idx_%s_%s" % (visa_type, place), 0)
     sess_list = session[visa_type][place]
     if len(sess_list) == 0:
         return None
     sess = sess_list[idx % len(sess_list)]
     logger.debug("session: " + sess)
     g.assign("idx_%s_%s" % (visa_type, place), idx + 1)
     return sess
コード例 #8
0
def crawler_req_ais(visa_type, code, places, start_time, requests):
    try:
        # prepare session
        sess, scedule_id = session_op.get_session(visa_type, code)
        if not sess:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, code, "No Session"))
            return
        refresh_endpoint = g.value(
            "crawler_node", "") + "/ais/refresh/?code=%s&id=%s&session=%s" % (
                code, scedule_id, sess)
        try:
            r = requests.get(refresh_endpoint,
                             timeout=7,
                             proxies=g.value("proxies", None))
        except:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, code, "Endpoint Timeout"))
            check_crawler_node()
            return
        if r.status_code != 200:
            logger.warning(
                "%s, %s, %s, FAILED, %s" %
                (start_time, visa_type, code, "Endpoint Inaccessible"))
            check_crawler_node()
            return
        result = r.json()
        if result["code"] > 0:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, code, "Session Expired"))
            session_op.replace_session(visa_type, code, sess)
            return
        date_list = result["msg"]
        new_sess = result["session"]
        session_op.replace_session_immediate(visa_type, code, sess, new_sess)
        for place, date in date_list:
            if place not in places:
                continue
            logger.info("%s, %s, %s, %s, SUCCESS, %s" %
                        (start_time, visa_type, code, place, date))
            g.assign("status_%s_%s" % (visa_type, place), date)
    except:
        logger.error(traceback.format_exc())
コード例 #9
0
ファイル: fast_visa.py プロジェクト: codewangg/us-visa
def init():
    global logger

    # get secret and proxy config
    parser = argparse.ArgumentParser()
    parser.add_argument('--secret',
                        type=str,
                        default='',
                        help="Fateadm secret file")
    parser.add_argument('--proxy', type=int, help="local proxy port")
    parser.add_argument('--session',
                        type=str,
                        default="session.json",
                        help="path to save sessions")
    parser.add_argument('--log_dir',
                        type=str,
                        default="./fast_visa",
                        help="directory to save logs")
    args = parser.parse_args()

    # config logging
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    log_path = os.path.join(args.log_dir, "fast_visa.log")
    logger = logging.getLogger("fast_visa")
    handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1)
    handler.suffix = "%Y%m%d"
    formatter = logging.Formatter(
        "%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.setLevel(logging.INFO)
    logger.addHandler(handler)
    logger.info("Initialization...")

    # config cracker
    if len(args.secret) == 0:
        cracker = args
        cracker.solve = lambda x: input('Captcha: ')
    else:
        cracker = Captcha(args.secret, args.proxy)
    proxies = dict(http='socks5h://127.0.0.1:' + str(args.proxy),
                   https='socks5h://127.0.0.1:' +
                   str(args.proxy)) if args.proxy else None
    g.assign("proxies", proxies)
    g.assign("cracker", cracker)

    # read cached session pool (if any)
    g.assign("session_file", args.session)
    session_op.init_cache()
コード例 #10
0
def init():
    global logger

    # get secret and proxy config
    parser = argparse.ArgumentParser()
    parser.add_argument('--secret', type=str, default='',
                        help="Fateadm secret file")
    parser.add_argument('--proxy', type=int, help="local proxy port")
    parser.add_argument('--session', type=str,
                        default="session.json", help="path to save sessions")
    parser.add_argument('--log_dir', type=str,
                        default="./fast_visa", help="directory to save logs")
    args = parser.parse_args()

    # config logging
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    log_path = os.path.join(args.log_dir, "fast_visa.log")
    logger = logging.getLogger("fast_visa")
    handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1)
    handler.suffix = "%Y%m%d"
    formatter = logging.Formatter(
        "%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.setLevel(logging.INFO)
    logger.addHandler(handler)
    logger.info("Initialization...")

    # config cracker
    # if len(args.secret) == 0:
    #    cracker = args
    #    cracker.solve = lambda x: input('Captcha: ')
    # else:
    #    cracker = Captcha(args.secret, args.proxy)
    cracker = Captcha()
    proxies = dict(
        http='socks5h://127.0.0.1:' + str(args.proxy),
        https='socks5h://127.0.0.1:' + str(args.proxy)
    ) if args.proxy else None
    g.assign("proxies", proxies)
    g.assign("cracker", cracker)

    # read cached session pool (if any)
    g.assign("session_file", args.session)
    session_op.init_cache()

    # restore previous data
    for visa_type in ["F", "B", "H", "O", "L"]:
        fn = '../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' % visa_type.lower()
        orig = json.loads(open(fn).read()) if os.path.exists(fn) else {}
        if "time" not in orig:
            continue
        date = orig["time"].split()[0]
        data = {}
        for k, v in orig.items():
            if k.endswith("2-" + date):
                continue
            if k.endswith(date):
                place = k.split("-")[0]
                if v == "/":
                    y, m, d = 0, 0, 0
                else:
                    y, m, d = list(map(int, v.split("/")))
                data[place] = (y, m, d)
                g.assign("status_%s_%s" % (visa_type, place), (y, m, d))
        logger.info("%s, Restored date: %s" % (visa_type, str(data)))
コード例 #11
0
ファイル: session_op.py プロジェクト: codewangg/us-visa
def visa_select(visa_type, place, sid):
    proxies = g.value("proxies", None)
    cookies = copy.deepcopy(g.COOKIES)
    cookies["sid"] = sid

    # select immigrant/nonimmigrant visa
    select_visa_type_uri = "https://cgifederal.secure.force.com/selectvisatype"
    r = requests.get(select_visa_type_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        "j_id0:SiteTemplate:theForm:ttip": "Nonimmigrant Visa",
        "j_id0:SiteTemplate:theForm:j_id176": "继续",
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_type_uri, data=data, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None

    # select place
    if place != "香港":
        select_post_uri = "https://cgifederal.secure.force.com/selectpost"
        r = requests.get(select_post_uri, cookies=cookies, proxies=proxies)
        if r.status_code != 200:
            return None
        soup = bs(r.text, "html.parser")
        view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
        view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
        view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
        view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
        contact_id = soup.find(id="j_id0:SiteTemplate:j_id112:contactId").get("value")
        place2id = {
            "北京": "j_id0:SiteTemplate:j_id112:j_id165:0", 
            "成都": "j_id0:SiteTemplate:j_id112:j_id165:1", 
            "广州": "j_id0:SiteTemplate:j_id112:j_id165:2", 
            "上海": "j_id0:SiteTemplate:j_id112:j_id165:3", 
            "沈阳": "j_id0:SiteTemplate:j_id112:j_id165:4"
        }
        place_code = soup.find(id=place2id[place]).get("value")
        data = {
            "j_id0:SiteTemplate:j_id112": "j_id0:SiteTemplate:j_id112",
            "j_id0:SiteTemplate:j_id112:j_id165": place_code,
            "j_id0:SiteTemplate:j_id112:j_id169": "继续",
            "j_id0:SiteTemplate:j_id112:contactId": contact_id,
            "com.salesforce.visualforce.ViewState": view_state,
            "com.salesforce.visualforce.ViewStateVersion": view_state_version,
            "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
            "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
        }
        r = requests.post(select_post_uri, data=data, cookies=cookies, proxies=proxies)
        if r.status_code != 200:
            return None

    # select visa category
    select_visa_category_uri = "https://cgifederal.secure.force.com/selectvisacategory"
    r = requests.get(select_visa_category_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    contact_id = soup.find(id="j_id0:SiteTemplate:j_id109:contactId").get("value")
    prefix = "j_id0:SiteTemplate:j_id109:j_id162:"
    category2id = {
        "B": {"北京": 0, "成都": 0, "广州": 0, "上海": 0, "沈阳": 0, "香港": 0}, 
        "F": {"北京": 1, "成都": 1, "广州": 1, "上海": 1, "沈阳": 1, "香港": 1}, 
        "O": {"北京": 4, "成都": 2, "广州": 3, "上海": 4, "沈阳": 2, "香港": 3}, 
        "H": {"北京": 2, "广州": 3, "上海": 2, "香港": 3}, 
        "L": {"北京": 3, "广州": 2, "上海": 3, "香港": 3} 
    }
    category_code = soup.find(id=prefix + str(category2id[visa_type][place])).get("value")
    data = {
        "j_id0:SiteTemplate:j_id109": "j_id0:SiteTemplate:j_id109",
        "j_id0:SiteTemplate:j_id109:j_id162": category_code,
        "j_id0:SiteTemplate:j_id109:j_id166": "继续",
        "j_id0:SiteTemplate:j_id109:contactId": contact_id,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_category_uri, data=data, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None

    # select visa type
    select_visa_code_uri = "https://cgifederal.secure.force.com/selectvisacode"
    r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    type2id = {
        "F": 0, 
        "B": 2, 
        "H": 0, 
        "O": 11 if place == "香港" else (7 if place == "广州" else 0), 
        "L": 8 if place == "香港" else 2
    }
    inputs = soup.find_all("input")
    type_codes = [x.get("value") for x in inputs if x.get("name") == "selectedVisaClass"]
    type_code = type_codes[type2id[visa_type]]
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        "j_id0:SiteTemplate:theForm:j_id178": "继续",
        "selectedVisaClass": type_code,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_code_uri, data=data, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None

    # update data
    update_data_uri = "https://cgifederal.secure.force.com/updatedata"
    r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    date = get_date(r.text)
    logger.info("%s, %s, SUCCESS_N, %s" % (visa_type, place, date))
    if date:
        g.assign("status_%s_%s" % (visa_type, place), date)
    return date
コード例 #12
0
def init():
    """ Program entry, a simple command line interface"""
    parser = argparse.ArgumentParser()
    parser.add_argument('--target',
                        required=True,
                        type=str,
                        choices=['ais', 'cgi'],
                        help='targeting system')
    parser.add_argument('--proxy', type=int, help='local proxy port')
    parser.add_argument('--crawler',
                        type=str,
                        default='crawler.txt',
                        help='crawler api list')
    parser.add_argument('--ais',
                        type=str,
                        default='ais.json',
                        help='ais account in json format')
    parser.add_argument('--log_dir',
                        type=str,
                        default=os.path.join(os.curdir, 'logs'),
                        help='directory to save logs')
    parser.add_argument('--log_name',
                        type=str,
                        default='visa_fetcher',
                        help='name of log file')
    parser.add_argument('--debug',
                        action='store_true',
                        default=False,
                        help='log debug information')
    parser.add_argument('--noinit_lw',
                        action='store_true',
                        default=False,
                        help='whether not to initiate the latest_written')
    args = parser.parse_args()

    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)

    G.assign('target_system', args.target)
    G.assign('session_file', f'{args.target}-session.json')
    G.assign('crawler_path', args.crawler)
    G.assign(
        'proxies', {
            'http': f'socks5h://127.0.0.1:{args.proxy}',
            'https': f'socks5h://127.0.0.1:{args.proxy}',
        } if args.proxy is not None else None)
    G.assign('log_dir', args.log_dir)
    G.assign('log_name', f'{args.target}_{args.log_name}')

    if args.target.lower() == 'ais':
        with open(args.ais) as f:
            ais_accounts = json.load(f)
            for k, v in ais_accounts.items():
                G.assign(k, v)

    if not args.noinit_lw:
        DB.VisaStatus.initiate_latest_written_sequential(args.target)

    global LOGGER
    global SESSION_CACHE
    LOGGER = util.init_logger(f'{args.target}_{args.log_name}', args.log_dir,
                              args.debug)
    SESSION_CACHE = SessionCache()

    LOGGER.info('FETCHING TARGET: %s', args.target.upper())
コード例 #13
0
        cd: timedelta = timedelta(hours=G.CD_HOURS)) -> None:
        if f'{visa_type}-{location}' not in G.CD_LIST:
            return
        self.logger.warning(
            f"mark {visa_type} {location} unavailable for {cd.seconds}s")
        with G.LOCK:
            self.session_avail[visa_type][location] = datetime.now() + cd


if __name__ == "__main__":
    # Manual testing

    from pprint import pprint

    test_log = 'test_session_log'
    G.assign('log_name', test_log)
    util.init_logger(test_log, './logs', debug=True)
    for sys in ('cgi', 'ais'):
        G.assign('target_system', sys)
        G.assign('session_file', f'test_{sys}_session.json')
        sc = SessionCache()

        if sys == 'cgi':
            sess = sc.get_session('F', '金边')
            print(sess)
            new_sess = Session(session='new_sess_{}'.format(''.join(
                random.choices(string.ascii_lowercase, k=16))),
                               sys='cgi')
            sc.replace_session('F', '金边', sess, new_sess)
            pprint(sc.session['F']['金边'])
        elif sys == 'ais':
コード例 #14
0
ファイル: fast_visa.py プロジェクト: codewangg/us-visa
def crawler(visa_type, places):
    open(visa_type + '_state', 'w').write('1')
    localtime = time.localtime()
    s = {'time': time.strftime('%Y/%m/%d %H:%M', localtime)}
    second = localtime.tm_sec
    cur = time.strftime('%Y/%m/%d', time.localtime())
    for place in places:
        try:
            # prepare session
            sess = session_op.get_session(visa_type, place)
            if not sess:
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "No Session"))
                continue
            cookies = copy.deepcopy(g.COOKIES)
            cookies["sid"] = sess
            # send request
            r = requests.get(g.HOME_URI,
                             headers=g.HEADERS,
                             cookies=cookies,
                             proxies=g.value("proxies", None))
            if r.status_code != 200:
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "Session Expired"))
                session_op.replace_session(visa_type, place, sess)
                continue
            # parse HTML
            page = r.text
            date = get_date(page)
            if not date:
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "Session Expired"))
                session_op.replace_session(visa_type, place, sess)
                continue
            elif date == (0, 0, 0):
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "Date Not Found"))
                last_status = g.value("status_%s_%s" % (visa_type, place),
                                      (0, 0, 0))
                if last_status != (0, 0, 0):
                    session_op.replace_session(visa_type, place, sess)
                elif random.random() < 0.05:
                    session_op.replace_session(visa_type, place, sess)
                continue
            logger.info("%s, %s, SUCCESS, %s" % (visa_type, place, date))
            g.assign("status_%s_%s" % (visa_type, place), date)
        except:
            logger.error(traceback.format_exc())

    # write to file
    for place in places:
        n = place + '-' + cur
        n2 = place + '2-' + cur
        y, m, d = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0))
        s[n] = s[n2] = '{}/{}/{}'.format(y, m, d) if y > 0 else "/"
        if s[n] != '/':
            path = visa_type + '/' + n.replace('-', '/')
            os.makedirs('/'.join(path.split('/')[:-1]), exist_ok=True)
            open(path,
                 'a+').write(s['time'].split(' ')[-1] + ' ' + s[n] + '\n')
    merge(
        '../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' %
        visa_type.lower(), s, cur)
    open(visa_type + '_state', 'w').write('0')
    os.system('python3 notify.py --type ' + visa_type + ' &')
コード例 #15
0
def visa_select(visa_type, place, sid):
    proxies = g.value("proxies", None)
    cookies = copy.deepcopy(g.COOKIES)
    cookies["sid"] = sid

    # select immigrant/nonimmigrant visa
    select_visa_type_uri = "https://cgifederal.secure.force.com/selectvisatype"
    r = requests.get(select_visa_type_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 1: bad status code', r.status_code)
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        "j_id0:SiteTemplate:theForm:ttip": "Nonimmigrant Visa",
        # "j_id0:SiteTemplate:theForm:j_id176": "继续",
        "j_id0:SiteTemplate:theForm:j_id176": "Continue",
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_type_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 2: bad status code', r.status_code)
        return None

    # select place
    # if place != "香港" and place != "台北":
    select_post_uri = "https://cgifederal.secure.force.com/selectpost"
    r = requests.get(select_post_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 3: bad status code', r.status_code)
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    contact_id = soup.find(
        id="j_id0:SiteTemplate:j_id112:contactId").get("value")
    # NOTE: Place IDs are not unique; the first ID is simply the first one on the list
    place2id = {
        # "北京": "j_id0:SiteTemplate:j_id112:j_id165:0",
        # "成都": "j_id0:SiteTemplate:j_id112:j_id165:1",
        # "广州": "j_id0:SiteTemplate:j_id112:j_id165:2",
        # "上海": "j_id0:SiteTemplate:j_id112:j_id165:3",
        # "沈阳": "j_id0:SiteTemplate:j_id112:j_id165:4"
        "Melbourne": "j_id0:SiteTemplate:j_id112:j_id165:0",
        "Perth": "j_id0:SiteTemplate:j_id112:j_id165:1",
        "Sydney": "j_id0:SiteTemplate:j_id112:j_id165:2",
    }
    place_code = soup.find(id=place2id[place]).get("value")
    data = {
        "j_id0:SiteTemplate:j_id112": "j_id0:SiteTemplate:j_id112",
        "j_id0:SiteTemplate:j_id112:j_id165": place_code,
        # "j_id0:SiteTemplate:j_id112:j_id169": "继续",
        "j_id0:SiteTemplate:j_id112:j_id169": "Continue",
        "j_id0:SiteTemplate:j_id112:contactId": contact_id,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_post_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 4: bad status code', r.status_code)
        return None

    # select visa category
    select_visa_category_uri = "https://cgifederal.secure.force.com/selectvisacategory"
    r = requests.get(select_visa_category_uri,
                     cookies=cookies,
                     proxies=proxies)
    if r.status_code != 200:
        print('visa_select 5: bad status code', r.status_code)
        return None

    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    contact_id = soup.find(
        id="j_id0:SiteTemplate:j_id109:contactId").get("value")
    prefix = "j_id0:SiteTemplate:j_id109:j_id162:"
    category2id = {
        # "B": {"北京": 0, "成都": 0, "广州": 0, "上海": 0, "沈阳": 0, "香港": 1, "台北": 1},
        # "F": {"北京": 1, "成都": 1, "广州": 1, "上海": 1, "沈阳": 1, "香港": 0, "台北": 0},
        # "O": {"北京": 4, "成都": 2, "广州": 3, "上海": 4, "沈阳": 2, "香港": 3, "台北": 3},
        # "H": {"北京": 2, "广州": 3, "上海": 2, "香港": 3, "台北": 3},
        # "L": {"北京": 3, "广州": 2, "上海": 3, "香港": 3, "台北": 3}
        "E": {
            "Melbourne": 5,
            "Perth": 0,
            "Sydney": 3
        },
        "F": {
            "Melbourne": 1,
            "Perth": 0,
            "Sydney": 0
        },
    }
    category_code = soup.find(id=prefix +
                              str(category2id[visa_type][place])).get("value")
    data = {
        "j_id0:SiteTemplate:j_id109": "j_id0:SiteTemplate:j_id109",
        "j_id0:SiteTemplate:j_id109:j_id162": category_code,
        # "j_id0:SiteTemplate:j_id109:j_id166": "继续",
        "j_id0:SiteTemplate:j_id109:j_id166": "Continue",
        "j_id0:SiteTemplate:j_id109:contactId": contact_id,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_category_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 6: bad status code', r.status_code)
        return None

    # select visa type
    select_visa_code_uri = "https://cgifederal.secure.force.com/selectvisacode"
    r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 7: bad status code', r.status_code)
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")

    # Indices of the list of 'selectedVisaClass' values
    if place == 'Sydney':
        F_typecode = 0
        E_typecode = -2
    elif place == 'Perth':
        F_typecode = 3
        E_typecode = -2
    elif place == 'Melbourne':
        E_typecode = 0
        F_typecode = 0
    else:
        print('visa_select 8: unsupported place', place)
        return None

    type2id = {"F": F_typecode, "E": E_typecode}
    inputs = soup.find_all("input")
    type_codes = [
        x.get("value") for x in inputs if x.get("name") == "selectedVisaClass"
    ]
    type_code = type_codes[type2id[visa_type]]
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        # "j_id0:SiteTemplate:theForm:j_id178": "继续",
        "j_id0:SiteTemplate:theForm:j_id178": "Continue",
        "selectedVisaClass": type_code,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_code_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 9: bad status code', r.status_code)
        return None

    # update data
    update_data_uri = "https://cgifederal.secure.force.com/updatedata"
    r = requests.get(update_data_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 10: bad status code', r.status_code)
        return None
    date = get_date(r.text)
    logger.info("%s, %s, SUCCESS_N, %s" % (visa_type, place, date))
    if date:
        g.assign("status_%s_%s" % (visa_type, place), date)
    return date
コード例 #16
0
def init():
    global logger
    global session_op

    # get secret and proxy config
    parser = argparse.ArgumentParser()
    parser.add_argument('--proxy', type=int, help="local proxy port")
    parser.add_argument('--session',
                        type=str,
                        default="session.json",
                        help="path to save sessions")
    parser.add_argument('--crawler',
                        type=str,
                        default="crawler.txt",
                        help="crawler api list")
    parser.add_argument('--ais',
                        type=str,
                        default="ais.json",
                        help="ais account in json format")
    parser.add_argument('--log_dir',
                        type=str,
                        default="./lite_visa",
                        help="directory to save logs")
    args = parser.parse_args()

    ais_account = json.loads(open(args.ais, 'r').read())
    for key in ais_account:
        g.assign(key, ais_account[key])
    # config logging
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    log_path = os.path.join(args.log_dir, "lite_visa.log")
    logger = logging.getLogger("lite_visa")
    handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1)
    handler.suffix = "%Y%m%d"
    formatter = logging.Formatter(
        "%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.setLevel(logging.INFO)
    logger.addHandler(handler)
    logger.info("Initialization...")

    proxies = dict(http='socks5h://127.0.0.1:' + str(args.proxy),
                   https='socks5h://127.0.0.1:' +
                   str(args.proxy)) if args.proxy else None
    g.assign("proxies", proxies)
    g.assign("crawler_path", args.crawler)
    check_crawler_node()

    # read cached session pool (if any)
    g.assign("session_file", args.session)
    session_op = SessionOp()
    session_op.init_cache()

    # restore previous data
    for visa_type in ["F", "B", "H", "O", "L"]:
        fn = '../visa-%s.json' % visa_type.lower()
        orig = json.loads(open(fn).read()) if os.path.exists(fn) else {}
        if "time" not in orig:
            continue
        date = orig["time"].split()[0]
        data = {}
        for k, v in orig.items():
            if k.endswith("2-" + date):
                continue
            if k.endswith(date):
                place = k.split("-")[0]
                if v == "/":
                    y, m, d = 0, 0, 0
                else:
                    y, m, d = list(map(int, v.split("/")))
                data[place] = (y, m, d)
                g.assign("status_%s_%s" % (visa_type, place), (y, m, d))
        logger.info("%s, Restored date: %s" % (visa_type, str(data)))