def crawler_req(visa_type, place, start_time, requests): try: # prepare session sess = session_op.get_session(visa_type, place) if not sess: logger.warning("%s, %s, %s, FAILED, %s" % (start_time, visa_type, place, "No Session")) return refresh_endpoint = g.value( "crawler_node", "") + "/refresh/?session=" + sess try: r = requests.get(refresh_endpoint, timeout=7, proxies=g.value("proxies", None)) except: logger.warning("%s, %s, %s, FAILED, %s" % (start_time, visa_type, place, "Endpoint Timeout")) check_crawler_node() return if r.status_code != 200: logger.warning("%s, %s, %s, FAILED, %s" % ( start_time, visa_type, place, "Endpoint Inaccessible")) check_crawler_node() return result = r.json() if result["code"] > 0: logger.warning("%s, %s, %s, FAILED, %s" % (start_time, visa_type, place, "Session Expired")) session_op.replace_session(visa_type, place, sess) return date = tuple(map(int, result["msg"].split("-"))) logger.info("%s, %s, %s, SUCCESS, %s" % (start_time, visa_type, place, date)) g.assign("status_%s_%s" % (visa_type, place), date) except: logger.error(traceback.format_exc())
def check_crawler_node(): if g.value("crawler_checking", False): return g.assign("crawler_checking", True) crawler_filepath = g.value("crawler_path", None) last_node = g.value("crawler_node", "") if not crawler_filepath: logger.warning("Crawler file not found") g.assign("crawler_checking", False) return with open(crawler_filepath, "r") as f: nodes = list(f.readlines()) for node in nodes: node = node.strip() try: r = requests.get(node, timeout=5) if r.status_code == 200: if last_node != node: g.assign("crawler_node", node) logger.warning("Choose Crawler Node: " + node) g.assign("crawler_checking", False) return except: pass logger.error("All Crawler Nodes Failed") g.assign("crawler_checking", False)
def crawler_req(visa_type, place): try: # prepare session sess = session_op.get_session(visa_type, place) if not sess: logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "No Session")) return cookies = copy.deepcopy(g.COOKIES) cookies["sid"] = sess # send request r = requests.get(g.CANCEL_URI, headers=g.HEADERS, cookies=cookies, proxies=g.value("proxies", None)) if r.status_code != 200: logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired")) session_op.replace_session(visa_type, place, sess) return # parse HTML page = r.text date = get_date(page) if not date: logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired")) session_op.replace_session(visa_type, place, sess) return elif date == (0, 0, 0): logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Date Not Found")) last_status = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0)) if last_status != (0, 0, 0): session_op.replace_session(visa_type, place, sess) elif not check_alive(page): logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired")) session_op.replace_session(visa_type, place, sess) return logger.info("%s, %s, SUCCESS, %s" % (visa_type, place, date)) g.assign("status_%s_%s" % (visa_type, place), date) except: logger.error(traceback.format_exc())
def merge(fn, s, cur, visa_type): status = g.value("merge_lock" + visa_type, 0) if status == 1: return g.assign("merge_lock" + visa_type, 1) orig = json.loads(open(fn).read()) if os.path.exists(fn) else {} open(fn.replace('.json', '-last.json'), 'w').write(json.dumps(orig, ensure_ascii=False)) last = copy.deepcopy(orig) for k in s: if '2-' in k: orig[k] = min_date(orig.get(k, '/'), s[k]) else: orig[k] = s[k] if cur not in orig.get('index', []): orig['index'] = [cur] + orig.get('index', []) orig['index'], o = orig['index'][:50], orig['index'][50:] rmkeys = [i for i in orig if i.split('-')[-1] in o] for r in rmkeys: orig.pop(r) open(fn, 'w').write(json.dumps(orig, ensure_ascii=False)) g.assign("merge_lock" + visa_type, 0) subprocess.check_call([ 'python3', 'notify.py', '--type', visa_type, '--js', json.dumps(orig, ensure_ascii=False), '--last_js', json.dumps(last, ensure_ascii=False) ])
def check_crawler_server_connection(): """ Check the connection of all the crawler server. Update the current crawler server in use. """ if G.value('checking_crawler_connection', False): return G.assign('checking_crawler_connection', True) crawler_path = G.value('crawler_path', None) previous_crawler_node = G.value('current_crawler_node', '') if crawler_path is None or not os.path.exists(crawler_path): LOGGER.warning( 'GlobalVar crawler file path is not found or path not valid.') G.assign('checking_crawler_connection', False) return with open(crawler_path) as f: crawler_server_lst = [line.strip() for line in f.readlines()] for crawler_node in crawler_server_lst: try: res = requests.get(crawler_node, timeout=5) if res.status_code == 200 and previous_crawler_node != crawler_node: G.assign('current_crawler_node', crawler_node) LOGGER.warning('Choose crawler node: %s', crawler_node) G.assign('checking_crawler_connection', False) return except Exception: pass LOGGER.error('All crawler servers fail!') G.assign('checking_crawler_connection', False)
def init_cache(self): session_file = g.value("session_file", "session.json") session = {} if os.path.exists(session_file): with open(session_file, "r") as f: try: session = json.load(f) except: pass g.assign("session", session)
def get_session(self, visa_type, place): # get a session given visa type and place. return None if failed. session = g.value("session", {}) if visa_type not in session or place not in session[visa_type]: return None idx = g.value("idx_%s_%s" % (visa_type, place), 0) sess_list = session[visa_type][place] if len(sess_list) == 0: return None sess = sess_list[idx % len(sess_list)] logger.debug("session: " + sess) g.assign("idx_%s_%s" % (visa_type, place), idx + 1) return sess
def crawler_req_ais(visa_type, code, places, start_time, requests): try: # prepare session sess, scedule_id = session_op.get_session(visa_type, code) if not sess: logger.warning("%s, %s, %s, FAILED, %s" % (start_time, visa_type, code, "No Session")) return refresh_endpoint = g.value( "crawler_node", "") + "/ais/refresh/?code=%s&id=%s&session=%s" % ( code, scedule_id, sess) try: r = requests.get(refresh_endpoint, timeout=7, proxies=g.value("proxies", None)) except: logger.warning("%s, %s, %s, FAILED, %s" % (start_time, visa_type, code, "Endpoint Timeout")) check_crawler_node() return if r.status_code != 200: logger.warning( "%s, %s, %s, FAILED, %s" % (start_time, visa_type, code, "Endpoint Inaccessible")) check_crawler_node() return result = r.json() if result["code"] > 0: logger.warning("%s, %s, %s, FAILED, %s" % (start_time, visa_type, code, "Session Expired")) session_op.replace_session(visa_type, code, sess) return date_list = result["msg"] new_sess = result["session"] session_op.replace_session_immediate(visa_type, code, sess, new_sess) for place, date in date_list: if place not in places: continue logger.info("%s, %s, %s, %s, SUCCESS, %s" % (start_time, visa_type, code, place, date)) g.assign("status_%s_%s" % (visa_type, place), date) except: logger.error(traceback.format_exc())
def init(): global logger # get secret and proxy config parser = argparse.ArgumentParser() parser.add_argument('--secret', type=str, default='', help="Fateadm secret file") parser.add_argument('--proxy', type=int, help="local proxy port") parser.add_argument('--session', type=str, default="session.json", help="path to save sessions") parser.add_argument('--log_dir', type=str, default="./fast_visa", help="directory to save logs") args = parser.parse_args() # config logging if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) log_path = os.path.join(args.log_dir, "fast_visa.log") logger = logging.getLogger("fast_visa") handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1) handler.suffix = "%Y%m%d" formatter = logging.Formatter( "%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s - %(message)s") handler.setFormatter(formatter) logger.setLevel(logging.INFO) logger.addHandler(handler) logger.info("Initialization...") # config cracker if len(args.secret) == 0: cracker = args cracker.solve = lambda x: input('Captcha: ') else: cracker = Captcha(args.secret, args.proxy) proxies = dict(http='socks5h://127.0.0.1:' + str(args.proxy), https='socks5h://127.0.0.1:' + str(args.proxy)) if args.proxy else None g.assign("proxies", proxies) g.assign("cracker", cracker) # read cached session pool (if any) g.assign("session_file", args.session) session_op.init_cache()
def init(): global logger # get secret and proxy config parser = argparse.ArgumentParser() parser.add_argument('--secret', type=str, default='', help="Fateadm secret file") parser.add_argument('--proxy', type=int, help="local proxy port") parser.add_argument('--session', type=str, default="session.json", help="path to save sessions") parser.add_argument('--log_dir', type=str, default="./fast_visa", help="directory to save logs") args = parser.parse_args() # config logging if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) log_path = os.path.join(args.log_dir, "fast_visa.log") logger = logging.getLogger("fast_visa") handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1) handler.suffix = "%Y%m%d" formatter = logging.Formatter( "%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s - %(message)s") handler.setFormatter(formatter) logger.setLevel(logging.INFO) logger.addHandler(handler) logger.info("Initialization...") # config cracker # if len(args.secret) == 0: # cracker = args # cracker.solve = lambda x: input('Captcha: ') # else: # cracker = Captcha(args.secret, args.proxy) cracker = Captcha() proxies = dict( http='socks5h://127.0.0.1:' + str(args.proxy), https='socks5h://127.0.0.1:' + str(args.proxy) ) if args.proxy else None g.assign("proxies", proxies) g.assign("cracker", cracker) # read cached session pool (if any) g.assign("session_file", args.session) session_op.init_cache() # restore previous data for visa_type in ["F", "B", "H", "O", "L"]: fn = '../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' % visa_type.lower() orig = json.loads(open(fn).read()) if os.path.exists(fn) else {} if "time" not in orig: continue date = orig["time"].split()[0] data = {} for k, v in orig.items(): if k.endswith("2-" + date): continue if k.endswith(date): place = k.split("-")[0] if v == "/": y, m, d = 0, 0, 0 else: y, m, d = list(map(int, v.split("/"))) data[place] = (y, m, d) g.assign("status_%s_%s" % (visa_type, place), (y, m, d)) logger.info("%s, Restored date: %s" % (visa_type, str(data)))
def visa_select(visa_type, place, sid): proxies = g.value("proxies", None) cookies = copy.deepcopy(g.COOKIES) cookies["sid"] = sid # select immigrant/nonimmigrant visa select_visa_type_uri = "https://cgifederal.secure.force.com/selectvisatype" r = requests.get(select_visa_type_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: return None soup = bs(r.text, "html.parser") view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value") data = { "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm", "j_id0:SiteTemplate:theForm:ttip": "Nonimmigrant Visa", "j_id0:SiteTemplate:theForm:j_id176": "继续", "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_visa_type_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: return None # select place if place != "香港": select_post_uri = "https://cgifederal.secure.force.com/selectpost" r = requests.get(select_post_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: return None soup = bs(r.text, "html.parser") view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value") contact_id = soup.find(id="j_id0:SiteTemplate:j_id112:contactId").get("value") place2id = { "北京": "j_id0:SiteTemplate:j_id112:j_id165:0", "成都": "j_id0:SiteTemplate:j_id112:j_id165:1", "广州": "j_id0:SiteTemplate:j_id112:j_id165:2", "上海": "j_id0:SiteTemplate:j_id112:j_id165:3", "沈阳": "j_id0:SiteTemplate:j_id112:j_id165:4" } place_code = soup.find(id=place2id[place]).get("value") data = { "j_id0:SiteTemplate:j_id112": "j_id0:SiteTemplate:j_id112", "j_id0:SiteTemplate:j_id112:j_id165": place_code, "j_id0:SiteTemplate:j_id112:j_id169": "继续", "j_id0:SiteTemplate:j_id112:contactId": contact_id, "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_post_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: return None # select visa category select_visa_category_uri = "https://cgifederal.secure.force.com/selectvisacategory" r = requests.get(select_visa_category_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: return None soup = bs(r.text, "html.parser") view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value") contact_id = soup.find(id="j_id0:SiteTemplate:j_id109:contactId").get("value") prefix = "j_id0:SiteTemplate:j_id109:j_id162:" category2id = { "B": {"北京": 0, "成都": 0, "广州": 0, "上海": 0, "沈阳": 0, "香港": 0}, "F": {"北京": 1, "成都": 1, "广州": 1, "上海": 1, "沈阳": 1, "香港": 1}, "O": {"北京": 4, "成都": 2, "广州": 3, "上海": 4, "沈阳": 2, "香港": 3}, "H": {"北京": 2, "广州": 3, "上海": 2, "香港": 3}, "L": {"北京": 3, "广州": 2, "上海": 3, "香港": 3} } category_code = soup.find(id=prefix + str(category2id[visa_type][place])).get("value") data = { "j_id0:SiteTemplate:j_id109": "j_id0:SiteTemplate:j_id109", "j_id0:SiteTemplate:j_id109:j_id162": category_code, "j_id0:SiteTemplate:j_id109:j_id166": "继续", "j_id0:SiteTemplate:j_id109:contactId": contact_id, "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_visa_category_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: return None # select visa type select_visa_code_uri = "https://cgifederal.secure.force.com/selectvisacode" r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: return None soup = bs(r.text, "html.parser") view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value") type2id = { "F": 0, "B": 2, "H": 0, "O": 11 if place == "香港" else (7 if place == "广州" else 0), "L": 8 if place == "香港" else 2 } inputs = soup.find_all("input") type_codes = [x.get("value") for x in inputs if x.get("name") == "selectedVisaClass"] type_code = type_codes[type2id[visa_type]] data = { "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm", "j_id0:SiteTemplate:theForm:j_id178": "继续", "selectedVisaClass": type_code, "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_visa_code_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: return None # update data update_data_uri = "https://cgifederal.secure.force.com/updatedata" r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: return None date = get_date(r.text) logger.info("%s, %s, SUCCESS_N, %s" % (visa_type, place, date)) if date: g.assign("status_%s_%s" % (visa_type, place), date) return date
def init(): """ Program entry, a simple command line interface""" parser = argparse.ArgumentParser() parser.add_argument('--target', required=True, type=str, choices=['ais', 'cgi'], help='targeting system') parser.add_argument('--proxy', type=int, help='local proxy port') parser.add_argument('--crawler', type=str, default='crawler.txt', help='crawler api list') parser.add_argument('--ais', type=str, default='ais.json', help='ais account in json format') parser.add_argument('--log_dir', type=str, default=os.path.join(os.curdir, 'logs'), help='directory to save logs') parser.add_argument('--log_name', type=str, default='visa_fetcher', help='name of log file') parser.add_argument('--debug', action='store_true', default=False, help='log debug information') parser.add_argument('--noinit_lw', action='store_true', default=False, help='whether not to initiate the latest_written') args = parser.parse_args() if not os.path.exists(args.log_dir): os.mkdir(args.log_dir) G.assign('target_system', args.target) G.assign('session_file', f'{args.target}-session.json') G.assign('crawler_path', args.crawler) G.assign( 'proxies', { 'http': f'socks5h://127.0.0.1:{args.proxy}', 'https': f'socks5h://127.0.0.1:{args.proxy}', } if args.proxy is not None else None) G.assign('log_dir', args.log_dir) G.assign('log_name', f'{args.target}_{args.log_name}') if args.target.lower() == 'ais': with open(args.ais) as f: ais_accounts = json.load(f) for k, v in ais_accounts.items(): G.assign(k, v) if not args.noinit_lw: DB.VisaStatus.initiate_latest_written_sequential(args.target) global LOGGER global SESSION_CACHE LOGGER = util.init_logger(f'{args.target}_{args.log_name}', args.log_dir, args.debug) SESSION_CACHE = SessionCache() LOGGER.info('FETCHING TARGET: %s', args.target.upper())
cd: timedelta = timedelta(hours=G.CD_HOURS)) -> None: if f'{visa_type}-{location}' not in G.CD_LIST: return self.logger.warning( f"mark {visa_type} {location} unavailable for {cd.seconds}s") with G.LOCK: self.session_avail[visa_type][location] = datetime.now() + cd if __name__ == "__main__": # Manual testing from pprint import pprint test_log = 'test_session_log' G.assign('log_name', test_log) util.init_logger(test_log, './logs', debug=True) for sys in ('cgi', 'ais'): G.assign('target_system', sys) G.assign('session_file', f'test_{sys}_session.json') sc = SessionCache() if sys == 'cgi': sess = sc.get_session('F', '金边') print(sess) new_sess = Session(session='new_sess_{}'.format(''.join( random.choices(string.ascii_lowercase, k=16))), sys='cgi') sc.replace_session('F', '金边', sess, new_sess) pprint(sc.session['F']['金边']) elif sys == 'ais':
def crawler(visa_type, places): open(visa_type + '_state', 'w').write('1') localtime = time.localtime() s = {'time': time.strftime('%Y/%m/%d %H:%M', localtime)} second = localtime.tm_sec cur = time.strftime('%Y/%m/%d', time.localtime()) for place in places: try: # prepare session sess = session_op.get_session(visa_type, place) if not sess: logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "No Session")) continue cookies = copy.deepcopy(g.COOKIES) cookies["sid"] = sess # send request r = requests.get(g.HOME_URI, headers=g.HEADERS, cookies=cookies, proxies=g.value("proxies", None)) if r.status_code != 200: logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired")) session_op.replace_session(visa_type, place, sess) continue # parse HTML page = r.text date = get_date(page) if not date: logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired")) session_op.replace_session(visa_type, place, sess) continue elif date == (0, 0, 0): logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Date Not Found")) last_status = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0)) if last_status != (0, 0, 0): session_op.replace_session(visa_type, place, sess) elif random.random() < 0.05: session_op.replace_session(visa_type, place, sess) continue logger.info("%s, %s, SUCCESS, %s" % (visa_type, place, date)) g.assign("status_%s_%s" % (visa_type, place), date) except: logger.error(traceback.format_exc()) # write to file for place in places: n = place + '-' + cur n2 = place + '2-' + cur y, m, d = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0)) s[n] = s[n2] = '{}/{}/{}'.format(y, m, d) if y > 0 else "/" if s[n] != '/': path = visa_type + '/' + n.replace('-', '/') os.makedirs('/'.join(path.split('/')[:-1]), exist_ok=True) open(path, 'a+').write(s['time'].split(' ')[-1] + ' ' + s[n] + '\n') merge( '../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' % visa_type.lower(), s, cur) open(visa_type + '_state', 'w').write('0') os.system('python3 notify.py --type ' + visa_type + ' &')
def visa_select(visa_type, place, sid): proxies = g.value("proxies", None) cookies = copy.deepcopy(g.COOKIES) cookies["sid"] = sid # select immigrant/nonimmigrant visa select_visa_type_uri = "https://cgifederal.secure.force.com/selectvisatype" r = requests.get(select_visa_type_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 1: bad status code', r.status_code) return None soup = bs(r.text, "html.parser") view_state = soup.find( id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find( id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find( id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find( id="com.salesforce.visualforce.ViewStateCSRF").get("value") data = { "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm", "j_id0:SiteTemplate:theForm:ttip": "Nonimmigrant Visa", # "j_id0:SiteTemplate:theForm:j_id176": "继续", "j_id0:SiteTemplate:theForm:j_id176": "Continue", "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_visa_type_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 2: bad status code', r.status_code) return None # select place # if place != "香港" and place != "台北": select_post_uri = "https://cgifederal.secure.force.com/selectpost" r = requests.get(select_post_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 3: bad status code', r.status_code) return None soup = bs(r.text, "html.parser") view_state = soup.find( id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find( id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find( id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find( id="com.salesforce.visualforce.ViewStateCSRF").get("value") contact_id = soup.find( id="j_id0:SiteTemplate:j_id112:contactId").get("value") # NOTE: Place IDs are not unique; the first ID is simply the first one on the list place2id = { # "北京": "j_id0:SiteTemplate:j_id112:j_id165:0", # "成都": "j_id0:SiteTemplate:j_id112:j_id165:1", # "广州": "j_id0:SiteTemplate:j_id112:j_id165:2", # "上海": "j_id0:SiteTemplate:j_id112:j_id165:3", # "沈阳": "j_id0:SiteTemplate:j_id112:j_id165:4" "Melbourne": "j_id0:SiteTemplate:j_id112:j_id165:0", "Perth": "j_id0:SiteTemplate:j_id112:j_id165:1", "Sydney": "j_id0:SiteTemplate:j_id112:j_id165:2", } place_code = soup.find(id=place2id[place]).get("value") data = { "j_id0:SiteTemplate:j_id112": "j_id0:SiteTemplate:j_id112", "j_id0:SiteTemplate:j_id112:j_id165": place_code, # "j_id0:SiteTemplate:j_id112:j_id169": "继续", "j_id0:SiteTemplate:j_id112:j_id169": "Continue", "j_id0:SiteTemplate:j_id112:contactId": contact_id, "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_post_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 4: bad status code', r.status_code) return None # select visa category select_visa_category_uri = "https://cgifederal.secure.force.com/selectvisacategory" r = requests.get(select_visa_category_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 5: bad status code', r.status_code) return None soup = bs(r.text, "html.parser") view_state = soup.find( id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find( id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find( id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find( id="com.salesforce.visualforce.ViewStateCSRF").get("value") contact_id = soup.find( id="j_id0:SiteTemplate:j_id109:contactId").get("value") prefix = "j_id0:SiteTemplate:j_id109:j_id162:" category2id = { # "B": {"北京": 0, "成都": 0, "广州": 0, "上海": 0, "沈阳": 0, "香港": 1, "台北": 1}, # "F": {"北京": 1, "成都": 1, "广州": 1, "上海": 1, "沈阳": 1, "香港": 0, "台北": 0}, # "O": {"北京": 4, "成都": 2, "广州": 3, "上海": 4, "沈阳": 2, "香港": 3, "台北": 3}, # "H": {"北京": 2, "广州": 3, "上海": 2, "香港": 3, "台北": 3}, # "L": {"北京": 3, "广州": 2, "上海": 3, "香港": 3, "台北": 3} "E": { "Melbourne": 5, "Perth": 0, "Sydney": 3 }, "F": { "Melbourne": 1, "Perth": 0, "Sydney": 0 }, } category_code = soup.find(id=prefix + str(category2id[visa_type][place])).get("value") data = { "j_id0:SiteTemplate:j_id109": "j_id0:SiteTemplate:j_id109", "j_id0:SiteTemplate:j_id109:j_id162": category_code, # "j_id0:SiteTemplate:j_id109:j_id166": "继续", "j_id0:SiteTemplate:j_id109:j_id166": "Continue", "j_id0:SiteTemplate:j_id109:contactId": contact_id, "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_visa_category_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 6: bad status code', r.status_code) return None # select visa type select_visa_code_uri = "https://cgifederal.secure.force.com/selectvisacode" r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 7: bad status code', r.status_code) return None soup = bs(r.text, "html.parser") view_state = soup.find( id="com.salesforce.visualforce.ViewState").get("value") view_state_version = soup.find( id="com.salesforce.visualforce.ViewStateVersion").get("value") view_state_mac = soup.find( id="com.salesforce.visualforce.ViewStateMAC").get("value") view_state_csrf = soup.find( id="com.salesforce.visualforce.ViewStateCSRF").get("value") # Indices of the list of 'selectedVisaClass' values if place == 'Sydney': F_typecode = 0 E_typecode = -2 elif place == 'Perth': F_typecode = 3 E_typecode = -2 elif place == 'Melbourne': E_typecode = 0 F_typecode = 0 else: print('visa_select 8: unsupported place', place) return None type2id = {"F": F_typecode, "E": E_typecode} inputs = soup.find_all("input") type_codes = [ x.get("value") for x in inputs if x.get("name") == "selectedVisaClass" ] type_code = type_codes[type2id[visa_type]] data = { "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm", # "j_id0:SiteTemplate:theForm:j_id178": "继续", "j_id0:SiteTemplate:theForm:j_id178": "Continue", "selectedVisaClass": type_code, "com.salesforce.visualforce.ViewState": view_state, "com.salesforce.visualforce.ViewStateVersion": view_state_version, "com.salesforce.visualforce.ViewStateMAC": view_state_mac, "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf } r = requests.post(select_visa_code_uri, data=data, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 9: bad status code', r.status_code) return None # update data update_data_uri = "https://cgifederal.secure.force.com/updatedata" r = requests.get(update_data_uri, cookies=cookies, proxies=proxies) if r.status_code != 200: print('visa_select 10: bad status code', r.status_code) return None date = get_date(r.text) logger.info("%s, %s, SUCCESS_N, %s" % (visa_type, place, date)) if date: g.assign("status_%s_%s" % (visa_type, place), date) return date
def init(): global logger global session_op # get secret and proxy config parser = argparse.ArgumentParser() parser.add_argument('--proxy', type=int, help="local proxy port") parser.add_argument('--session', type=str, default="session.json", help="path to save sessions") parser.add_argument('--crawler', type=str, default="crawler.txt", help="crawler api list") parser.add_argument('--ais', type=str, default="ais.json", help="ais account in json format") parser.add_argument('--log_dir', type=str, default="./lite_visa", help="directory to save logs") args = parser.parse_args() ais_account = json.loads(open(args.ais, 'r').read()) for key in ais_account: g.assign(key, ais_account[key]) # config logging if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) log_path = os.path.join(args.log_dir, "lite_visa.log") logger = logging.getLogger("lite_visa") handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1) handler.suffix = "%Y%m%d" formatter = logging.Formatter( "%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s - %(message)s") handler.setFormatter(formatter) logger.setLevel(logging.INFO) logger.addHandler(handler) logger.info("Initialization...") proxies = dict(http='socks5h://127.0.0.1:' + str(args.proxy), https='socks5h://127.0.0.1:' + str(args.proxy)) if args.proxy else None g.assign("proxies", proxies) g.assign("crawler_path", args.crawler) check_crawler_node() # read cached session pool (if any) g.assign("session_file", args.session) session_op = SessionOp() session_op.init_cache() # restore previous data for visa_type in ["F", "B", "H", "O", "L"]: fn = '../visa-%s.json' % visa_type.lower() orig = json.loads(open(fn).read()) if os.path.exists(fn) else {} if "time" not in orig: continue date = orig["time"].split()[0] data = {} for k, v in orig.items(): if k.endswith("2-" + date): continue if k.endswith(date): place = k.split("-")[0] if v == "/": y, m, d = 0, 0, 0 else: y, m, d = list(map(int, v.split("/"))) data[place] = (y, m, d) g.assign("status_%s_%s" % (visa_type, place), (y, m, d)) logger.info("%s, Restored date: %s" % (visa_type, str(data)))