def func_wrapper(wasm=False, *args, **kwargs): if wasm: prepare('async', 'sync.wast', 'sync.abi', 0, __file__) return func(*args, **kwargs) else: prepare('async', 'sync.py', 'async.abi', 2, __file__) return func(*args, **kwargs)
def func_wrapper(wasm=True, *args, **kwargs): if wasm: prepare('twitbot', 'twitbot.wast', 'twitbot.abi', 0, __file__) return func(*args, **kwargs) else: prepare('twitbot', 'twitbot.py', 'twitbot.abi', 2, __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): if wasm: prepare('storagetest', 'storagetest.wast', 'storagetest.abi', 0, __file__) return func(*args, **kwargs) else: prepare('storagetest', 'storagetest.py', 'storagetest.abi', 2, __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): if wasm: prepare('testapi', 'testapi.wast', 'testapi.abi', __file__) return func(*args, **kwargs) else: prepare('testapi', 'testapi.py', 'testapi.abi', __file__) return func(*args, **kwargs)
def func_wrapper(wasm=False, *args, **kwargs): if wasm: prepare('lockunlock', 'lockunlock.wast', 'lockunlock.abi', 0, __file__) return func(*args, **kwargs) else: prepare('lockunlock', 'lockunlock.py', 'lockunlock.abi', 2, __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): if wasm: prepare('lab', 'lab.wast', 'lab.abi', __file__) return func(*args, **kwargs) else: prepare('lab', 'lab.py', 'lab.abi', __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): cpp2wast.set_src_path(os.path.dirname(__file__)) if not cpp2wast.build('math.cpp'): raise Exception("build {0} failed".format('math.cpp')) prepare('backyard', 'backyard.py', 'backyard.abi', __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): if wasm: prepare('counter', 'counter.wast', 'counter.abi', __file__) return func(*args, **kwargs) else: prepare('counter', 'counter.py', 'counter.abi', __file__, 1) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): prepare('apitest', 'apitest.py', 'apitest.abi', __file__, 6) ret = 0 try: ret = func(*args, **kwargs) except Exception as e: print(e) return ret
def func_wrapper(*args, **kwargs): if 'wasm' in kwargs and kwargs['wasm']: prepare('currency', '../../build/contracts/currency/currency.wast', '../../build/contracts/currency/currency.abi', 0, __file__) return func(*args, **kwargs) else: prepare('currency', 'currency.py', 'currency.abi', __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): prepare('apitest', 'apitest.py', 'apitest.abi', __file__, 6) ret = 0 try: ret = func(*args, **kwargs) except Exception as e: s = eosapi.JsonStruct(e.args[0]) print(s) return ret
def idle_func(): common.cur_path = path try: common.prepare() #Get repository, cache and manifest and lock them common.set_cur_repository(common.cur_path, False) start() except borg.key.PassphraseWrong as e: common.unset_cur_repository(e) transitions.unload() global pass_not_setup, pass_again, msg_label, pass_entry if pass_not_setup: builder = Gtk.Builder.new_from_file('../data/ui/passphrase.ui') grid = builder.get_object('grid') common.stack.add_named(grid, 'pass') msg_label = builder.get_object('msg_label') pass_entry = builder.get_object('entry') button = builder.get_object('button') def enter_pass(caller): set_pass(pass_entry.get_text()) transitions.to_load() GLib.idle_add(idle_func) builder.connect_signals({ 'click_button': lambda caller: button.clicked(), 'enter_pass': enter_pass }) pass_not_setup = False common.stack.set_visible_child_full( 'pass', Gtk.StackTransitionType.CROSSFADE) common.addrep_button.hide() common.back_button.show() common.win.set_title('Passphrase') common.back_button.disconnect(common.back_button_signal_id) def callback(caller): pass_again = False transitions.to_replist() common.back_button_signal_id = common.back_button.connect( 'clicked', callback) if pass_again: msg_label.set_text('Passphrase Incorrect. Try again') else: msg_label.set_text( 'Passphrase needed to read or modify the contents of this repository' ) pass_again = True
def func_wrapper(*args, **kwargs): if not eosapi.get_account('test'): r = eosapi.create_account('eosio', 'test', initeos.key1, initeos.key2) assert r eosapi.produce_block() if wasm: prepare('credit', 'credit.wast', 'credit.abi', __file__) else: prepare('credit', 'credit.py', 'credit.abi', __file__) return func(*args, **kwargs)
def main(): train = pd.read_csv('csv/train.csv') goals = pd.DataFrame({'survived': train['Survived']}) train = prepare(train) goals = goals.to_numpy() inputs = train.to_numpy() # add column x0 := 1 (rows, cols) = inputs.shape X = np.ones((rows, cols + 1)) X[:, 1:] = inputs # normal equation Xt = X.transpose() theta = np.linalg.inv(Xt.dot(X)).dot(Xt.dot(goals)) # get rid of x0 again weights = theta.transpose()[0][1:] correct = 0 for i in range(len(inputs)): inpt = inputs[i] pred = inpt.dot(weights).round().astype(int) goal = goals[i][0] if pred == goal: correct += 1 accuracy = correct / len(goals) * 100 print(f'accuracy: {accuracy:.3f}%') weights_file = 'weights.csv' np.savetxt('weights.csv', weights, delimiter=',') print(f'saved weights {weights} as CSV to {weights_file}')
def main(): data = pd.read_csv('csv/train.csv') # shuffle and split (0.7/0.3) data = data.sample(frac=1) m = len(data) m_train = round(m * 0.7) train = data.iloc[:m_train,] valid = data.iloc[m_train:,] goals_train = pd.DataFrame({'survived': train['Survived']}) goals_valid = pd.DataFrame({'survived': valid['Survived']}) train = prepare(train) valid = prepare(valid) goals_train = goals_train.to_numpy() goals_valid = goals_valid.to_numpy() inputs_train = train.to_numpy() inputs_valid = valid.to_numpy() theta = np.zeros((inputs_train.shape[1], 1)) alpha = 0.01 iterations = 100_000 y = goals_train for i in range(iterations): m = len(inputs_train) x = inputs_train h = x.dot(theta) p = sigmoid(h) theta -= (alpha/m) * x.transpose().dot((p - y)) c = cost(x, y, theta) if i % (iterations/10) == 0: print(f'cost={c:.3f}') predictions = sigmoid(inputs_valid.dot(theta)) predictions[:,0] = predictions[:,0].round() correct = len(inputs_valid[(predictions == goals_valid)[:,0]]) accuracy = correct / len(goals_valid) * 100 print(f'accuracy: {accuracy:.3f}%') weights_file = 'weights.csv' np.savetxt('weights.csv', theta, delimiter=',') print(f'saved weights {theta} as CSV to {weights_file}')
def idle_func(): common.cur_path = os.path.join(directory_path_button.get_filename(), name) common.prepare() #Simulate do_init, Create repository, cache and manifest and lock them common.cur_repository = borg.repository.Repository(common.cur_path, create=True, exclusive=True, lock_wait=1) common.cur_repository.__enter__() borg.archiver.logger.info('Initializing repository at "%s"' % common.cur_path) common.cur_rep_key = borg.key.key_creator(common.cur_repository, args) common.cur_rep_manifest = borg.helpers.Manifest(common.cur_rep_key, common.cur_repository) common.cur_rep_manifest.key = common.cur_rep_key common.cur_rep_manifest.write() common.cur_repository.commit() common.cur_rep_cache = borg.cache.Cache(common.cur_repository, common.cur_rep_key, common.cur_rep_manifest, lock_wait=1, warn_if_unencrypted=False) common.cur_rep_cache.__enter__() repository.start()
def ui_entry_api(board_info, scenario_info, out=''): arg_list = ['board_cfg_gen.py', '--board', board_info, '--scenario', scenario_info, '--out', out] err_dic = common.prepare() if err_dic: return err_dic err_dic = main(arg_list) return err_dic
def ui_entry_api(board_info, scenario_info, launch_info, out=''): err_dic = {} arg_list = ['launch_cfg_gen.py', '--board', board_info, '--scenario', scenario_info, '--launch', launch_info, '--uosid', '0', '--out', out] err_dic = common.prepare() if err_dic: return err_dic err_dic = main(arg_list) return err_dic
def image_to_text(image, mapping): palette = mapping['palette'] + [0] * (768 - len(mapping['palette'])) prepared_image = prepare(image, palette) sz = prepared_image.size r = [] for y in range(sz[1]): ln = '' for x in range(sz[0]): pxi = prepared_image.getpixel((x, y)) ln += _escape % mapping['ansi'][pxi] + mapping['charmap'][pxi] + _escape % '0' r.append(ln) return '\n'.join(r)
def func_wrapper(*args, **kwargs): if wasm: prepare('codestore', 'codestore.wast', 'codestore.abi', __file__) prepare('renter', 'renter.wast', 'renter.abi', __file__) else: prepare('codestore', 'codestore.py', 'codestore.abi', __file__) prepare('renter', 'renter.py', 'renter.abi', __file__) sync = Sync('codestore', _dir=os.path.dirname(__file__), _ignore=['t.py', 'renter.py', 'codestore.py']) sync.deploy_mpy('math.py') return func(*args, **kwargs)
def main(): test = pd.read_csv('csv/test.csv') test = prepare(test, with_id=True).to_numpy() submission = pd.DataFrame({ 'PassengerId': np.array([], dtype=np.int), 'Survived': np.array([], dtype=np.int), }) for i in range(len(test)): id = int(test[i][0]) inpt = test[i][1:] # ignore id column pred = sigmoid(inpt.dot(weights)) submission = submission.append({ 'PassengerId': id, 'Survived': pred }, ignore_index=True) submission['PassengerId'] = submission['PassengerId'].astype(int) submission['Survived'] = submission['Survived'].round().fillna(0).astype( int) submission.to_csv('submission.csv', index=False)
def main(): train = pd.read_csv('csv/train.csv') goals = pd.DataFrame({'survived': train['Survived']}) train = prepare(train) print(train) goals = goals.to_numpy() inputs = train.to_numpy() weights = np.zeros(inputs.shape[1]) alpha = 1e-5 for i in range(1000): for j in range(len(inputs)): goal = goals[j][0] inpt = inputs[j] pred = inpt.dot(weights) delta = pred - goal weigh_delta = delta * inpt adjustments = weigh_delta * alpha weights -= adjustments correct = 0 for i in range(len(inputs)): inpt = inputs[i] pred = inpt.dot(weights).round().astype(int) goal = goals[i][0] if pred == goal: correct += 1 accuracy = correct / len(goals) * 100 print(f'accuracy: {accuracy:.3f}%') weights_file = 'weights.csv' np.savetxt('weights.csv', weights, delimiter=',') print(f'saved weights {weights} as CSV to {weights_file}')
def main(args): """ This is main function to start generate source code related with board :param args: it is a command line args for the script """ err_dic = {} (err_dic, params) = common.get_param(args) if err_dic: return err_dic # check env err_dic = common.prepare() if err_dic: return err_dic common.BOARD_INFO_FILE = params['--board'] common.SCENARIO_INFO_FILE = params['--scenario'] common.get_vm_num(params['--scenario']) common.get_vm_types() if common.VM_COUNT > common.MAX_VM_NUM: err_dic[ 'vm count'] = "The vm count in config xml should be less or equal {}!".format( common.MAX_VM_NUM) return err_dic # check if this is the scenario config which matched board info # get board name (err_dic, board) = common.get_board_name() if err_dic: return err_dic (err_dic, scenario) = common.get_scenario_name() if err_dic: return err_dic board_cfg_lib.BOARD_NAME = board # check if this is the scenario config which matched board info (err_dic, status) = common.is_config_file_match() if not status: err_dic[ 'board config'] = "The board xml file does not match scenario xml file!" return err_dic output = '' if params['--out']: if os.path.isabs(params['--out']): output = params['--out'] else: output = ACRN_PATH + params['--out'] else: output = ACRN_CONFIG_DEF board_fix_dir = os.path.join(output, "boards/") scen_board_dir = os.path.join(output, "scenarios/" + scenario + "/") common.mkdir(board_fix_dir) common.mkdir(scen_board_dir) config_pci = board_fix_dir + GEN_FILE[0] config_board = board_fix_dir + GEN_FILE[1] config_acpi = board_fix_dir + GEN_FILE[2] config_board_h = board_fix_dir + GEN_FILE[4] config_misc_cfg = scen_board_dir + GEN_FILE[3] config_vbar_base = scen_board_dir + GEN_FILE[5] # generate pci_devices.h with open(config_pci, 'w+') as config: pci_devices_h.generate_file(config) # generate board_info.h with open(config_board_h, 'w+') as config: err_dic = board_info_h.generate_file(config) if err_dic: return err_dic # generate board.c with open(config_board, 'w+') as config: err_dic = board_c.generate_file(config) if err_dic: return err_dic # generate vbar_base.h with open(config_vbar_base, 'w+') as config: vbar_base_h.generate_file(config) # generate platform_acpi_info.h with open(config_acpi, 'w+') as config: acpi_platform_h.generate_file(config, ACRN_DEFAULT_ACPI) # generate misc_cfg.h with open(config_misc_cfg, 'w+') as config: err_dic = misc_cfg_h.generate_file(config) if err_dic: return err_dic if not err_dic: print("Board configurations for {} is generated successfully.".format( board)) else: print("Board configurations for {} is generated failed.".format(board)) return err_dic
def func_wrapper(*args, **kwargs): prepare('counter', 'counter.py', 'counter.abi', __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): if wasm: prepare('native', 'native.wast', 'native.abi', __file__) else: prepare('native', 'native.py', 'native.abi', __file__) return func(*args, **kwargs)
def main(args): """ Generate board related source code :param args: command line args """ err_dic = {} (err_dic, params) = common.get_param(args) if err_dic: return err_dic # check env err_dic = common.prepare() if err_dic: return err_dic common.BOARD_INFO_FILE = params['--board'] common.SCENARIO_INFO_FILE = params['--scenario'] common.get_vm_num(params['--scenario']) common.get_vm_types() # get board name (err_dic, board_name) = common.get_board_name() # get scenario name (err_dic, scenario) = common.get_scenario_name() if err_dic: return err_dic if common.VM_COUNT > common.MAX_VM_NUM: err_dic[ 'vm count'] = "Number of VMs in scenario xml file should be no greater than {}!".format( common.MAX_VM_NUM) return err_dic # check if this is the scenario config which matches board info (err_dic, status) = common.is_config_file_match() if not status: err_dic[ 'scenario config'] = "The board xml file does not match scenario xml file!" return err_dic if params['--out']: if os.path.isabs(params['--out']): scen_output = params['--out'] + "/scenarios/" + scenario + "/" else: scen_output = ACRN_PATH + params[ '--out'] + "/scenarios/" + scenario + "/" else: scen_output = ACRN_CONFIG_DEF + "/" + scenario + "/" scen_board = scen_output + board_name + "/" common.mkdir(scen_board) common.mkdir(scen_output) vm_config_h = scen_output + GEN_FILE[0] vm_config_c = scen_output + GEN_FILE[1] pci_config_c = scen_board + GEN_FILE[2] config_hv = scen_board + board_name + GEN_FILE[3] ivshmem_config_h = scen_board + GEN_FILE[4] pt_intx_config_c = scen_board + GEN_FILE[5] # parse the scenario.xml get_scenario_item_values(params['--board'], params['--scenario']) (err_dic, scenario_items) = validate_scenario_setting(params['--board'], params['--scenario']) if err_dic: common.print_red("Scenario xml file validation failed:", err=True) return err_dic # generate board defconfig with open(config_hv, 'w+') as config: err_dic = board_defconfig.generate_file(scenario_items['hv'], config) if err_dic: return err_dic # generate vm_configuration.h with open(vm_config_h, 'w') as config: vm_configurations_h.generate_file(scenario_items, config) # generate vm_configuration.c with open(vm_config_c, 'w') as config: err_dic = vm_configurations_c.generate_file(scenario_items, config) if err_dic: return err_dic # generate ivshmem_cfg.h with open(ivshmem_config_h, 'w') as config: ivshmem_cfg_h.generate_file(scenario_items, config) # generate pci_dev.c with open(pci_config_c, 'w') as config: pci_dev_c.generate_file(scenario_items['vm'], config) # generate pt_intx.c with open(pt_intx_config_c, 'w') as config: pt_intx_c.generate_file(scenario_items['vm'], config) # generate ASL code of ACPI tables for Pre-launched VMs asl_gen.main(args) if not err_dic: print("Scenario configuration files were created successfully.") else: print("Failed to create scenario configuration files.") return err_dic
def main(): client = MongoClient() db = client.topcoder config = ConfigParser.RawConfigParser() config.read("config/challenges.ini") init = config.getboolean("default", "init") if init: index = config.getint("default", "page_index") else: index = 1 use_proxy = config.getboolean("default", "use_proxy") common.prepare(use_proxy=use_proxy) while True: path = "/v2/challenges/past?type=develop&pageIndex=%d&pageSize=10" % index raw = common.guarded_read(path) if '"data": []' in raw: return print "Page", index lists = json.loads(raw) for challenge in lists["data"]: cid = challenge["challengeId"] if filter_out(cid): continue if db.challenges.find_one({"challengeId": cid}): if init: continue else: return common.random_sleep(1) print ' ', challenge["challengeName"] path = "/v2/challenges/" + str(cid) d = common.to_json(common.guarded_read(path)) path = "/v2/challenges/registrants/" + str(cid) raw = '{"registrants": %s}' % common.guarded_read(path) registrants = common.to_json(raw) path = "/v2/challenges/submissions/" + str(cid) submissions = common.to_json(common.guarded_read(path)) d.update(registrants) d.update(submissions) format_challenge(d) db.challenges.insert_one(d) index += 1 if init: config.set("default", "page_index", index) with open("config/challenges.ini", "wb") as fp: config.write(fp) common.random_sleep(10)
def func_wrapper(*args, **kwargs): prepare('hello', 'hello.jl', 'hello.abi', __file__, 7) func(*args, **kwargs)
def prepare_image(file_name): image = Image.open(file_name) return common.prepare(image, None)
import os import re from StringIO import StringIO import common common.prepare(use_proxy=False) from format import format root = "http://m.gulongbbs.com" if os.path.exists("book.htm"): lst = open("book.htm").read() else: lst = common.simple_read(open("book.txt").read().strip()) beg = lst.index("<title>") + 7 end = lst.index('_', beg) book_title = lst[beg:end] beg = lst.index('table width="100%"', beg) end = lst.index("</table>", beg) lst = lst[beg:end] chapters = [] beg = 0 while True: beg = lst.find("href", beg)
def func_wrapper(*args, **kwargs): prepare('greeter', 'greeter.py', 'greeter.abi', 2, __file__) return func(*args, **kwargs)
def func_wrapper(*args, **kwargs): prepare('testcase', 'Testcase.java', 'Testcase.abi', __file__, 12) func(*args, **kwargs)
def main(args): """ This is main function to start generate source code related with board :param args: it is a command line args for the script """ err_dic = {} (err_dic, params) = common.get_param(args) if err_dic: return err_dic # check env err_dic = common.prepare() if err_dic: return err_dic common.BOARD_INFO_FILE = params['--board'] common.SCENARIO_INFO_FILE = params['--scenario'] common.get_vm_num(params['--scenario']) common.get_vm_types() # get board name (err_dic, board_name) = common.get_board_name() # get scenario name (err_dic, scenario) = common.get_scenario_name() if err_dic: return err_dic if common.VM_COUNT > common.MAX_VM_NUM: err_dic[ 'vm count'] = "The vm count in config xml should be less or equal {}!".format( common.MAX_VM_NUM) return err_dic # check if this is the scenario config which matched board info (err_dic, status) = common.is_config_file_match() if not status: err_dic[ 'scenario config'] = "The board xml and scenario xml should be matched!" return err_dic if params['--out']: if os.path.isabs(params['--out']): scenario_dir = os.path.join(params['--out'], scenario + '/') config_hv = os.path.join(params['--out'], board_name + GEN_FILE[3]) else: scenario_dir = os.path.join(ACRN_PATH + params['--out'], scenario + '/') config_hv = os.path.join(ACRN_PATH + params['--out'], board_name + GEN_FILE[3]) else: scenario_dir = os.path.join(ACRN_CONFIG_DEF, scenario + '/') config_hv = os.path.join(ACRN_CONFIGS, board_name + GEN_FILE[3]) common.print_yel("{}".format("Override board defconfig...", warn=True)) common.mkdir(scenario_dir) vm_config_h = scenario_dir + GEN_FILE[0] vm_config_c = scenario_dir + GEN_FILE[1] pci_config_c = scenario_dir + GEN_FILE[2] # parse the scenario.xml get_scenario_item_values(params['--board'], params['--scenario']) (err_dic, scenario_items) = validate_scenario_setting(params['--board'], params['--scenario']) if err_dic: common.print_red("Validate the scenario item failure", err=True) return err_dic # generate board defconfig with open(config_hv, 'w+') as config: err_dic = board_defconfig.generate_file(scenario_items['hv'], config) if err_dic: return err_dic # generate vm_configuration.h with open(vm_config_h, 'w') as config: vm_configurations_h.generate_file(scenario_items, config) # generate vm_configuration.c with open(vm_config_c, 'w') as config: err_dic = vm_configurations_c.generate_file(scenario_items['vm'], config) if err_dic: return err_dic # generate pci_dev.c for vm_i, pci_dev_num in scenario_items['vm'].cfg_pci.pci_dev_num.items(): if pci_dev_num >= 2: with open(pci_config_c, 'w') as config: pci_dev_c.generate_file(scenario_items['vm'], config) break if not err_dic: print( "Scenario configurations for {} is generated successfully.".format( scenario)) else: print("Scenario configurations for {} is generated failed.".format( scenario)) return err_dic
def main(): config = ConfigParser.RawConfigParser() config.read("config/users.ini") use_proxy = config.getboolean("default", "proxy") common.prepare(use_proxy=use_proxy) client = MongoClient() db = client.topcoder print "Crawling users..." print "Current:", db.users.count() invalid = set() if os.path.exists("config/invalid_handles"): for line in open("config/invalid_handles"): line = line.strip() if line: invalid.add(line) handles = set() for challenge in db.challenges.find(): for reg in challenge["registrants"]: handle = reg["handle"].lower() if u' ' in handle or u'/' in handle or u'\\' in handle: continue if handle in invalid: continue if handle in handles: continue if db.users.find_one({u"handle": handle}): continue handles.add(handle) print len(handles), "users to be crawled." print "-----" for handle in handles: print handle while True: try: request = common.make_request(u"/v3.0.0/members/" + quote(handle)) s = urllib2.urlopen(request).read().decode("utf-8") d = common.to_json(s)[u"result"][u"content"] refine_user(d) user_skills(d) db.users.insert_one(d) common.random_sleep(1) break except urllib2.HTTPError, e: if e.code == 404 or e.code == 403: invalid.add(handle) with open("config/invalid_handles", "w") as fp: for h in sorted(invalid): fp.write(h + '\n') common.random_sleep(1) break else: print "HTTP Error", e.code, e.msg print e.geturl() print e.fp.read() except Exception, e: print "An unknown exception occurred." print e common.random_sleep(20)
def func_wrapper(*args, **kwargs): prepare('actiontest', 'actiontest.py', 'actiontest.abi', 2, __file__) func(*args, **kwargs)
def main(): common.prepare(use_proxy=g_config.use_proxy) client = MongoClient() db = client.topcoder print "Crawling users..." print "Current:", db.users.count() if g_config.recrawl_all: print "Recrawl all users" if g_config.recheck_invalid_handles: print "Recheck invalid handles" invalid = set() def add_invalid_handle(hdl): invalid.add(hdl) with open(INVALID_HANDLES_FPATH, "w") as fp: for h in sorted(invalid): try: fp.write(h.encode("utf-8") + '\n') except UnicodeDecodeError: pass if os.path.exists(INVALID_HANDLES_FPATH): for line in open(INVALID_HANDLES_FPATH): line = line.strip() if line: invalid.add(line.decode("utf-8")) handles = set() query = {u"handle": None} field = {u"_id": 1} nb_challeges = db.challenges.count() for index, challenge in enumerate(db.challenges.find()): if (index + 1) % 100 == 0: print "Challenges: %d/%d" % (index + 1, nb_challeges) for reg in challenge[u"registrants"]: handle = reg[u"handle"].lower() for ch in ur" \/": if ch in handle: continue if handle in invalid: continue if handle in handles: continue if not g_config.recrawl_all: query[u"handle"] = handle if db.users.find_one(query, field) is not None: continue handles.add(handle) if g_config.recheck_invalid_handles or g_config.recrawl_all: handles.update(invalid) invalid = set() if os.path.exists(INVALID_HANDLES_FPATH): os.rename(INVALID_HANDLES_FPATH, INVALID_HANDLES_FPATH + ".bak") print len(handles), "users to be crawled" print "-----" for index, handle in enumerate(handles): print "[%d/%d]" % (index + 1, len(handles)), handle while True: try: try: quoted = quote_handle(handle) except KeyError: add_invalid_handle(handle) break request = common.make_request(u"/v3/members/" + quoted) s = common.open_request_and_read(request).decode("utf-8") d = common.to_json(s)[u"result"][u"content"] try: refine_user(d) user_skills(d) user_stats(d) user_external_accounts(d) except: traceback.print_exc() add_invalid_handle(handle) common.random_sleep(DOZE) break db.users.insert_one(d) common.random_sleep(DOZE) break except urllib2.HTTPError, e: if e.code in (404, 403,): add_invalid_handle(handle) common.random_sleep(DOZE) break else: print "HTTP Error", e.code, e.msg print e.geturl() print e.fp.read() except KeyboardInterrupt: return except: