def generate_inverse_strategy_data(strategy_lists, ef_input_keys, ef_output_keys, techno_keys_waste, techno_keys_product, unit_scaling_techno_product, unit_scaling_techno_waste, sacrificial_lca, water_dir): initial_ratios_inverse = {} print("Calculate initial in/out ratios for inverse strategy activities") for act in pyprind.prog_bar(strategy_lists['inverse']): initial_ratios_inverse[act] = 1 / initial_in_over_out( act, ef_input_keys, ef_output_keys, techno_keys_waste, techno_keys_product, unit_scaling_techno_product, unit_scaling_techno_waste) print("getting row incides for inverse strategy") rows_of_interest_inverse = {} for act in pyprind.prog_bar(strategy_lists['inverse']): rows_of_interest_inverse[act] = identify_rows_of_interest_inverse( sacrificial_lca, act, ef_input_keys, ef_output_keys, techno_keys_waste, techno_keys_product) with open(os.path.join(water_dir, "initial_ratios_inverse.pickle"), "wb") as f: pickle.dump(initial_ratios_inverse, f) with open(os.path.join(water_dir, "rows_of_interest_inverse.pickle"), "wb") as f: pickle.dump(rows_of_interest_inverse, f)
def generate_default_strategy_data(strategy_lists, transformation_from, transformation_to, sacrificial_lca, land_use_dir): if strategy_lists['default']: initial_ratios_default = {} print( "Calculate initial in/out ratios for default strategy activities") for act in pyprind.prog_bar(strategy_lists['default']): initial_ratios_default[act] = initial_in_over_out( act, transformation_from, transformation_to, ) rows_of_interest_default = {} print("getting rows of interest for default strategy") for act in pyprind.prog_bar(strategy_lists['default']): rows_of_interest_default[act] = identify_rows_of_interest_default( sacrificial_lca, act, transformation_from, transformation_to) with open(os.path.join(land_use_dir, "initial_ratios_default.pickle"), "wb") as f: pickle.dump(initial_ratios_default, f) with open( os.path.join(land_use_dir, "rows_of_interest_default.pickle"), "wb") as f: pickle.dump(rows_of_interest_default, f)
def generate_inverse_strategy_data(strategy_lists, transformation_from, transformation_to, sacrificial_lca, land_use_dir): if strategy_lists['inverse']: initial_ratios_inverse = {} print( "Calculate initial in/out ratios for inverse strategy activities") for act in pyprind.prog_bar(strategy_lists['inverse']): initial_ratios_inverse[act] = 1 / initial_in_over_out( act, transformation_from, transformation_to, ) print("getting keys for inverse strategy") rows_of_interest_inverse = {} for act in pyprind.prog_bar(strategy_lists['inverse']): rows_of_interest_inverse[act] = identify_rows_of_interest_inverse( sacrificial_lca, act, transformation_from, transformation_to, ) with open(os.path.join(land_use_dir, "initial_ratios_inverse.pickle"), "wb") as f: pickle.dump(initial_ratios_inverse, f) with open( os.path.join(land_use_dir, "rows_of_interest_inverse.pickle"), "wb") as f: pickle.dump(rows_of_interest_inverse, f)
def generate_default_strategy_data(strategy_lists, ef_input_keys, ef_output_keys, techno_keys_waste, techno_keys_product, unit_scaling_techno_product, unit_scaling_techno_waste, sacrificial_lca, water_dir): initial_ratios_default = {} print("Calculate initial in/out ratios for default strategy activities") for act in pyprind.prog_bar(strategy_lists['default']): initial_ratios_default[act] = initial_in_over_out( act, ef_input_keys, ef_output_keys, techno_keys_waste, techno_keys_product, unit_scaling_techno_product, unit_scaling_techno_waste) rows_of_interest_default = {} print("getting rows of interest for default strategy") for act in pyprind.prog_bar(strategy_lists['default']): rows_of_interest_default[act] = identify_rows_of_interest_default( sacrificial_lca, act, ef_input_keys, ef_output_keys, techno_keys_waste, techno_keys_product) with open(os.path.join(water_dir, "initial_ratios_default.pickle"), "wb") as f: pickle.dump(initial_ratios_default, f) with open(os.path.join(water_dir, "rows_of_interest_default.pickle"), "wb") as f: pickle.dump(rows_of_interest_default, f)
def main(args): path = args.path filenames = os.listdir(path) filenames = [n for n in filenames if n.endswith(".edus.arcs")] filenames.sort() for filename in pyprind.prog_bar(filenames): edus_arcs = utils.read_lines(os.path.join(path, filename), process=lambda line: line.split()) edus_deprels = [] for arcs in edus_arcs: arcs = treetk.hyphens2arcs(arcs) deprels = [l for h, d, l in arcs] edus_deprels.append(deprels) # Write with open( os.path.join(path, filename.replace(".edus.arcs", ".edus.deprels")), "w") as f: for deprels in edus_deprels: deprels = " ".join(deprels) f.write("%s\n" % deprels)
def read_examples_from_file(fields, format: str, path): make_example = { 'json': Example.fromJSON, 'dict': Example.fromdict, 'tsv': Example.fromCSV, 'csv': Example.fromCSV }[format.lower()] lines = 0 with open(os.path.expanduser(path), encoding="utf8") as f: for line in f: lines += 1 with open(os.path.expanduser(path), encoding="utf8") as f: if format == 'csv': reader = unicode_csv_reader(f) elif format == 'tsv': reader = unicode_csv_reader(f, delimiter='\t') else: reader = f next(reader) examples = [ make_example(line, fields) for line in pyprind.prog_bar( reader, iterations=lines, title='\nReading and processing data from "' + path + '"') ] return examples
def media_jobs(cfg, dry_run, is_video): """Generate either all image or all video jobs for a given config.""" if is_video: media_lc = 'video' media_uc = 'Video' src_media = src_videos media_targets = vid_targets else: media_lc = 'image' media_uc = 'Image' src_media = src_images media_targets = img_targets l.info('Generating {} jobs...'.format(media_lc)) jobs = [] skipped = 0 si = src_media(cfg) if not si: l.debug('No source {}s'.format(media_lc)) return for src in pyprind.prog_bar(si): j, s = media_targets(cfg, src, dry_run) jobs.extend(j) skipped += s l.info('{} jobs: running {}, skipped {}, total {}'.format( media_uc, len(jobs), skipped, len(jobs) + skipped)) return jobs
def find_optimal_gamma(horizon=15, n_traj=1000, map_name="5x5"): w_env = FrozenLakeEnv(map_name="9x9", horizon=horizon, theta_dist="hypercube") for gamma in candidate_gammas: test_pi_H = EpsOptimalMDPPolicy(w_env, discount=gamma) logger.log("-------------------") logger.log("Evaluating gamma={} for {} timesteps".format( gamma, horizon)) logger.log("-------------------") test_env = HumanCRLWrapper(w_env, test_pi_H, 0) logger.log("Obtaining Samples...") # Alas, the rllab samplers don't support hot swapping envs and batch sizes # TODO: write a new parallel sampler, instead of sampling manually rewards = [] regrets = [] for i in pyprind.prog_bar(range(n_traj)): observation = test_env.reset() for t in range(horizon): action = test_env.nA - 1 observation, reward, done, info = test_env.step(action) if done: rewards.append(info["accumulated rewards"]) regrets.append(info["accumulated regret"]) break #feel free to add more data logger.log("NumTrajs {}".format(n_traj)) logger.log("AverageReturn {}".format(np.mean(rewards))) logger.log("StdReturn {}".format(np.std(rewards))) logger.log("MaxReturn {}".format(np.max(rewards))) logger.log("MinReturn {}".format(np.min(rewards))) logger.log("AverageRegret {}".format(np.mean(regrets))) logger.log("MaxRegret {}".format(np.max(regrets))) logger.log("MinRegret {}".format(np.min(regrets)))
def eval_mdp_policies(horizon=15, n_traj=100000, log_dir=None): text_output_file = None if log_dir is None else osp.join(log_dir, "text") w_env = FrozenLakeEnv(horizon=horizon) if text_output_file is not None: logger.add_text_output(text_output_file) for human_policy in human_mdp_policies.values(): logger.log("-------------------") logger.log("Evaluating {} for {} timesteps".format( human_policy.__name__, horizon)) logger.log("-------------------") test_pi_H = human_policy(w_env) test_env = HumanCRLWrapper(w_env, test_pi_H) logger.log("Obtaining Samples...") rewards = [] for i in pyprind.prog_bar(range(n_traj)): observation = test_env.reset() for t in range(horizon): # _, action = observation # if action == test_env.nA: action = test_env.nA - 1 observation, reward, done, info = test_env.step(action) if done: rewards.append(info["accumulated rewards"]) break #feel free to add more data logger.log("NumTrajs {}".format(n_traj)) logger.log("AverageReturn {}".format(np.mean(rewards))) logger.log("StdReturn {}".format(np.std(rewards))) logger.log("MaxReturn {}".format(np.max(rewards))) logger.log("MinReturn {}".format(np.min(rewards)))
def crawl_songs(area_list, save_path): singer_id_done = [] for root, dirs, files in os.walk(save_path): for file_name in files: singer_id = re.search("song_list_.*_(.*).json", file_name).group(1) singer_id_done.append(int(singer_id)) area_2_singers = json.load( open("../Sources/qq_music_yield/area_2_singers.json", "r", encoding="utf-8")) for area in area_list: singer_list = area_2_singers[area] bar = pyprind.ProgBar( len(singer_list), title="process of crawling songs of singers of {}".format(area)) for singer in pyprind.prog_bar(singer_list): singer_name = singer[settings.KEY_SINGER_NAME] singer_id = singer[settings.KEY_SINGER_ID] if singer_id in singer_id_done: continue song_list = crawl_song_list(singer) json.dump( song_list, open("%s/song_list_%s_%s.json" % (save_path, singer_name, singer_id), "w", encoding="utf-8")) bar.update()
def train(self, sess=None): if sess is None: sess = tf.Session() sess.run(tf.global_variables_initializer()) replay_buffer = SimpleReplayBuffer(env_spec=self._env.spec, max_replay_buffer_size=self._max_pool_size) path_length = 0 episode_rewards = 0 observation = self._env.reset() with sess.as_default(): self._update_target() for ep in range(self._n_epochs): mean_loss = 0 trained_iter = 0 epoch_rewards = list() episode_lengths = list() with logger.prefix('Epoch #%d | ' % ep): for ep_iter in pyprind.prog_bar(range(self._epoch_length)): self._env.render() action, _ = self._es.get_action(observation) next_observation, reward, terminal, _ = self._env.step(action) replay_buffer.add_sample( observation=observation, next_observation=next_observation, action=action, terminal=terminal, reward=reward, ) episode_rewards += reward path_length += 1 observation = next_observation if terminal or path_length >= self._max_path_length: observation = self._env.reset() epoch_rewards.append(episode_rewards) episode_lengths.append(path_length) path_length = 0 episode_rewards = 0 iter = ep * self._epoch_length + ep_iter if replay_buffer.size > self._min_pool_size: batch = replay_buffer.random_batch(self._batch_size) loss = self._do_training(iter, batch) mean_loss += loss trained_iter += 1 if iter % self._target_update_period == 0 and replay_buffer.size > self._min_pool_size: self._update_target() logger.record_tabular('mean-td-error', (mean_loss/self._epoch_length)) logger.record_tabular('mean-episode-reward', np.mean(epoch_rewards)) logger.record_tabular('mean-epsiode-length', np.mean(episode_lengths)) logger.dump_tabular()
def main(): config = utils.Config() filenames = os.listdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented")) filenames = [n for n in filenames if n.endswith(".txt")] filenames.sort() utils.mkdir( os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed")) for filename in pyprind.prog_bar(filenames): path_seg = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented", filename) path_raw = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "raw", filename) path_dst = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed", filename.replace(".txt", ".edus")) # Input edus = utils.read_lines(path_seg, process=lambda line: line) edus = remove_empty_lines(filename, edus) raw_lines = utils.read_lines(path_raw, process=lambda line: line) raw_lines = remove_empty_lines(filename, raw_lines) assert count_chars(edus) == count_chars(raw_lines) # Processing edus = convert_edus(edus, raw_lines) assert count_chars(edus) == count_chars(raw_lines) # Output utils.write_lines(path_dst, edus)
def _identify_techno_keys(self): """Identify keys of activities with water production exchanges These should be considered in balancing. Keys are grouped by activities associated with input exchanges (e.g. wastewater treatment) and output exchanges (e.g. potable water) """ names_file = Path(__file__).parents[0]/'data'/'water_intermediary_exchange_names.json' if not names_file.is_file(): raise FileNotFoundError("Could not find file water_intermediary_exchange_names.json in expected location") with open(names_file, "rb") as f: techno_product_names_dict = json.load(f) techno_product_names = techno_product_names_dict[self.ecoinvent_version] techno_treat_keys = [] techno_transfo_keys = [] db_loaded = Database(self.database_name).load() for act_key, act in pyprind.prog_bar(db_loaded.items()): if act['reference product'] in techno_product_names: if act['production amount']<0: techno_treat_keys.append(act_key) elif act['production amount']>0: techno_transfo_keys.append(act_key) else: warnings.warn("Activity {} has a product exchange {} with " "an amount of 0: skipped".format( act_key, act['reference product'] )) return techno_transfo_keys, techno_treat_keys
def subject_verify(new_arxiv): if new_arxiv.count > 0: subject_list = copy.copy(new_arxiv.subject) remove_list = [] new_ver = arxiv(new_arxiv.author) new_ver.parse() for count in pyprind.prog_bar(range(len(new_ver.title))): if len(set(subject_list) & set(new_ver.category[count])) == 0: remove_list.append(count) new_ver.arxiv_id = (np.delete(np.array(new_ver.arxiv_id), remove_list, axis=0)).tolist() new_ver.time = (np.delete(np.array(new_ver.time), remove_list, axis=0)).tolist() new_ver.title = (np.delete(np.array(new_ver.title), remove_list, axis=0)).tolist() new_ver.category = (np.delete(np.array(new_ver.category), remove_list, axis=0)).tolist() new_ver.pdf = (np.delete(np.array(new_ver.pdf), remove_list, axis=0)).tolist() new_ver.contributor = (np.delete(np.array(new_ver.contributor), remove_list, axis=0)).tolist() new_ver.count = len(new_ver.title) new_ver.subject = combine_subject(new_ver.category) print('Remove %d articles' % len(remove_list)) return new_ver else: return new_arxiv
def main(): dataset_path = "/path/to/Caltech-101" modelzoo_path = "/path/to/VGG16" # create an instance convnet = FeatureExtractor( prototxt_path=os.path.join(modelzoo_path, "vgg16_deploy.prototxt"), caffemodel_path=os.path.join(modelzoo_path, "vgg16.caffemodel"), target_layer_name="fc7", image_size=224, mean_values=[103.939, 116.779, 123.68]) # header f = open("caltech101_vggnet_fc7_features.csv", "w") header = ["filepath"] for i in xrange(4096): header.append("feat%d" % (i+1)) header = ",".join(header) + "\n" f.write(header) # extract features categories = os.listdir(dataset_path) for category in pyprind.prog_bar(categories): file_names = os.listdir(os.path.join(dataset_path, category)) for file_name in file_names: img = cv2.imread(os.path.join(dataset_path, category, file_name)) feat = convnet.transform(img) feat_str = [os.path.join(category, file_name)] for value in feat: feat_str.append(str(value)) row = ",".join(feat_str) f.write("%s\n" % row) f.flush() f.close()
def split_by_id(beatdf, id_field='ptid', frac_train=.6, frac_val=.15): """ Deterministically splits the beatdf by _patient_ """ empis = np.sort(beatdf[id_field].unique()) print("Splitting %d unique patients" % len(empis)) # deterministic split rs = np.random.RandomState(0) perm_idx = rs.permutation(len(empis)) num_train = int(frac_train * len(empis)) num_val = int(frac_val * len(empis)) train_idx = perm_idx[:num_train] val_idx = perm_idx[num_train:(num_train + num_val)] test_idx = perm_idx[(num_train + num_val):] empis_train = empis[train_idx] empis_val = empis[val_idx] empis_test = empis[test_idx] print(" ... patient splits: %d train, %d val, %d test " % (len(empis_train), len(empis_val), len(empis_test))) # make dictionaries train_dict = {e: "train" for e in empis_train} val_dict = {e: "val" for e in empis_val} test_dict = {e: "test" for e in empis_test} split_dict = {**train_dict, **val_dict, **test_dict} # add train/val test split to each split = [] for e in pyprind.prog_bar(beatdf[id_field]): split.append(split_dict[e]) beatdf['split'] = split return beatdf
def media_jobs(cfg, dry_run, is_video): if is_video: media_lc = 'video' media_uc = 'Video' src_media = src_videos media_targets = vid_targets else: media_lc = 'image' media_uc = 'Image' src_media = src_images media_targets = img_targets l.info('Generating {} jobs...'.format(media_lc)) jobs = [] skipped = 0 si = src_media(cfg) if not si: l.debug('No source {}s'.format(media_lc)) return for src in pyprind.prog_bar(si): j, s = media_targets(cfg, src, dry_run) jobs.extend(j) skipped += s l.info('{} jobs: running {}, skipped {}, total {}' .format(media_uc, len(jobs), skipped, len(jobs) + skipped)) return jobs
def Train_Eval_Process_Layer_v2(train_X,train_Y,test_X,test_Y): # LSTM epoch_num = 10 #model = LSTM_model(input_dim=8,hidden_dim=8) model = One_Sent2Other_Sent() optimizer = optim.Adam(model.parameters()) criterion = nn.BCELoss() for epoch_ in pyprind.prog_bar(range(epoch_num)): model.train() for i in range(len(train_X)): X = torch.tensor(train_X[i])#.cuda() pred_train_Y = model(X) Y = torch.tensor([train_Y[i]])#.cuda() true_train_Y = Y.squeeze(dim=-1) loss = criterion(pred_train_Y, true_train_Y.float()) optimizer.zero_grad() loss.backward() optimizer.step() #print('loss:',loss) model.eval() pred_test_Y = list() for i in range(len(test_X)): X = torch.tensor(test_X[i])#.cuda() pred_test_Y_i = model(X).cpu().data.numpy().reshape(1,1) pred_test_Y.append(pred_test_Y_i) test_Y_hat = np.concatenate(pred_test_Y,0) test_Y_hat_list = list() for i in range(test_Y_hat.shape[0]): if test_Y_hat[i,0] >= 0.5: test_Y_hat_list.append(1) else: test_Y_hat_list.append(0) Evaluation(test_Y_hat_list,test_Y)
def write_db_to_brightway(self): for s in pyprind.prog_bar(self.scenarios.items()): scenario, year = s print('Write new database to Brightway2.') wurst.write_brightway2_database( self.db, "ecoinvent_" + scenario + "_" + str(year))
def extract_ecospold2_directory(dirpath, use_mp=True): """Extract all the ``.spold`` files in the directory ``dirpath``. Use a multiprocessing pool if ``use_mp``, which is the default.""" if os.name == 'nt': use_mp = False assert os.path.isdir(dirpath), "Can't find directory {}".format(dirpath) filelist = [os.path.join(dirpath, filename) for filename in os.listdir(dirpath) if filename.lower().endswith(".spold") ] print("Extracting {} undefined datasets".format(len(filelist))) if use_mp: start = time() # With code from # http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/ with multiprocessing.Pool( processes=multiprocessing.cpu_count(), initializer=lambda : signal.signal(signal.SIGINT, signal.SIG_IGN) ) as pool: try: data = pool.map(generic_extractor, filelist) except KeyboardInterrupt: pool.terminate() raise KeyboardInterrupt print("Extracted {} undefined datasets in {:.1f} seconds".format(len(data), time() - start)) else: data = [generic_extractor(fp) for fp in pyprind.prog_bar(filelist)] # Unroll lists of lists return [y for x in data for y in x]
def validate_directory_against_xsd(dirpath, schema): """Extract all the ``.spold`` files in the directory ``dirpath``. Use a multiprocessing pool if ``use_mp``, which is the default.""" assert os.path.isdir(dirpath), "Can't find data directory {}".format( dirpath) assert os.path.isfile(schema), "Can't find schema file {}".format(schema) filelist = [ os.path.join(dirpath, filename) for filename in os.listdir(dirpath) if filename.lower().endswith(".spold") ] print(("Validating {} undefined datasets".format(len(filelist)))) errors = [] ecospold2_schema = etree.XMLSchema(etree.parse(open(schema))) for fp in pyprind.prog_bar(filelist): file = etree.parse(open(fp)) if not ecospold2_schema.validate(file): errors.append(os.path.basename(fp)) if errors: print("The following files did not validate:") pprint.pprint(errors) else: print("All files valid")
def main(): config = utils.Config() path_out = os.path.join(config.getpath("data"), "aarc_abst") utils.mkdir(path_out) filenames = os.listdir(config.getpath("aarc")) filenames = [n for n in filenames if n.endswith(".txt.utf8")] filenames.sort() nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner", "textcat"]) cnt = 0 for filename in pyprind.prog_bar(filenames): text = extract_abstract(os.path.join(config.getpath("aarc"), filename)) if text == "": # print("No Abstract!: %s" % filename) continue with open( os.path.join(path_out, filename.replace(".txt.utf8", ".doc.tokens")), "w") as f: doc = nlp(text) tokens = [token.text for token in doc] assert len(tokens) > 0 tokens = " ".join(tokens) f.write("%s\n" % tokens) cnt += 1 print("Processed %d/%d files" % (cnt, len(filenames)))
def bulk_upload(self): items_to_upload = [] append = items_to_upload.append credentials = get_db_credentials(self.settings) if 'sqlite3' in credentials['ENGINE']: db = dataset.connect("sqlite:///" + os.path.basename(credentials['NAME'])) if 'postgresql' in credentials['ENGINE']: db = dataset.connect('postgresql://' + credentials['USER'] + ':' + credentials['PASSWORD'] + '@' + credentials['HOST'] + ':' + credentials['PORT'] + '/' + credentials['NAME']) table = db['visitors_visitor'] print("Starting checks to see if we have this item in our database.") if len(self.items) == 0: print("Nothing to upload") else: for i in pyprind.prog_bar(range(len(self.items))): item = self.items[i] try: item['date'] = datetime.datetime.strptime( item['date'], '%Y-%m-%d', ) except ValueError: item['date'] = None append(item) print("uploading %i records for table %s" % (len(items_to_upload), self.mytable)) table.insert_many(items_to_upload)
def Format_csv2XY(path): X, Y, title, self_contradictory_template, revision_id_list = list(), list( ), list(), list(), list() df = pd.read_csv(path) page_title = list(df['page_title']) revision_text = list(df['revision_text']) revision_id = list(df['revision_id']) for i in pyprind.prog_bar(range(len(revision_text))): self_contradictory_template_i = list() text = revision_text[i] title_i = page_title[i] revision_id_i = revision_id[i] if isinstance(text, str) is True and len(text.split()) != 0: wikicode = mwparserfromhell.parse(text) templates = wikicode.filter_templates() is_pos = False for j in range(len(templates)): if 'Self-contradictory' in templates[j]: is_pos = True self_contradictory_template_i.append(templates[j]) if is_pos: X.append(str(text)) title.append(title_i) Y.append(1) else: X.append(str(text)) title.append(title_i) Y.append(0) self_contradictory_template.append(self_contradictory_template_i) revision_id_list.append(revision_id_i) return X, Y, title, self_contradictory_template, revision_id_list
def Train_Eval_Process_Layer(train_X, train_Y, test_X, test_Y): # RetaGNN + Self Attention import pyprind import pickle epoch_num = 10 input_dim = 8 hidden_dim = 8 model = double_LSTM_model().cuda() optimizer = optim.Adam(model.parameters()) criterion = nn.BCELoss() for epoch_ in range(epoch_num): model.train() for i in pyprind.prog_bar(range(len(train_X))): batch_X, batch_Y = train_X[i], train_Y[i] #(b,l,d) ,(b,) batch_Y_hat = model(batch_X).squeeze(dim=-1) loss = criterion(batch_Y_hat, batch_Y.float()) optimizer.zero_grad() loss.backward() optimizer.step() #print('loss:',loss) model.eval() pred_Y = list() for i in range(len(test_X)): pred_Y.append(model(test_X[i]).view(1, -1)) test_Y_hat = torch.cat(pred_Y, 0).cpu().data.numpy() test_Y_hat_list = list() for i in range(test_Y_hat.shape[0]): if test_Y_hat[i, 0] >= 0.5: test_Y_hat_list.append(1) else: test_Y_hat_list.append(0) Evaluation(test_Y_hat_list, test_Y)
def extract_ecospold2_directory(dirpath, use_mp=True): """Extract all the ``.spold`` files in the directory ``dirpath``. Use a multiprocessing pool if ``use_mp``, which is the default.""" if os.name == 'nt': use_mp = False assert os.path.isdir(dirpath), "Can't find directory {}".format(dirpath) filelist = [ os.path.join(dirpath, filename) for filename in os.listdir(dirpath) if filename.lower().endswith(".spold") ] print(("Extracting {} undefined datasets".format(len(filelist)))) if use_mp: start = time() # With code from # http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/ with multiprocessing.Pool(processes=multiprocessing.cpu_count(), initializer=lambda: signal.signal( signal.SIGINT, signal.SIG_IGN)) as pool: try: data = pool.map(generic_extractor, filelist) except KeyboardInterrupt: pool.terminate() raise KeyboardInterrupt print(("Extracted {} undefined datasets in {:.1f} seconds".format( len(data), time() - start))) else: data = [generic_extractor(fp) for fp in pyprind.prog_bar(filelist)] # Unroll lists of lists return [y for x in data for y in x]
def getJob(self): job = [] for i in range(1, 1000): if requests.get( 'https://www.yourator.co/api/v2/jobs?page={}'.format( i)).json()['jobs'] == []: break job += requests.get( 'https://www.yourator.co/api/v2/jobs?page={}'.format( i)).json()['jobs'] for i in pyprind.prog_bar(job): res = requests.get('https://www.yourator.co/' + i['path']).text soup = BeautifulSoup(res, "html.parser") i['inside'] = {} i['inside']['description'] = soup.select( '.description')[0].text.strip() if len( soup.select('.description')) else '' for j in soup.select('.basic-info'): key, value = j.text.strip().replace(' ', '').replace('\n', '').split(':') i['inside'][key] = value if i['has_salary_info']: for j in soup.select('h2'): if j.text == '薪資範圍': i['salary'] = j.findNext('article').text with open('job.json', 'w') as f: json.dump(self.testData(job), f)
def calc_features(net, n_images, blobs): n_images = int(0.6 * n_images) batchsize = net.blobs['data'].data.shape[0] feats = dict() for blob in blobs: out_shape = list(net.blobs[blob].data.shape) out_shape[0] = n_images print('Will allocate {:.2f} GiB of memory'.format( np.prod(out_shape) * 2 / 1024 / 1024 / 1024)) feats[blob] = np.zeros( tuple(out_shape), dtype=np.float16 if not blob == 'label' else np.int32) print('Need %.3f GiB' % (np.sum([x.nbytes for x in feats.values()]) / 1024 / 1024 / 1024)) for it in pyprind.prog_bar(range(0, n_images, batchsize), update_interval=10, stream=sys.stderr): net.forward() for blob in blobs: feats[blob][it:it + batchsize, ...] = net.blobs[blob].data[:feats[blob][it:it + batchsize, ...].shape[0], ...] return [feats[blob] for blob in blobs]
def __new__(cls, iterable=None, desc=None, total=None, leave=True, backend=None, verbose=True): if backend is None: backend = Progressbar.backend if not verbose: backend = "hide" if backend == "tqdm": from tqdm import tqdm return tqdm(iterable=iterable, desc=desc, total=total, leave=leave, ascii=True, ncols=80, file=sys.stdout, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed" "}<{remaining}{postfix}]") # remove rate_fmt elif backend == "tqdm_notebook": from tqdm import tqdm_notebook return tqdm_notebook(iterable=iterable, desc=desc, total=total, leave=leave) elif backend == "pyprind": from pyprind import ProgBar, prog_bar ProgBar._adjust_width = lambda self: None # keep constant width if iterable is None: return ProgBar(total, title=desc, stream=1) else: return prog_bar(iterable, title=desc, stream=1, iterations=total) elif backend == "hide": return NoProgressbar(iterable=iterable) else: raise NotImplementedError("unknown backend")
def evaluate(model, model_name, sents, ivocab): train = False loss = 0.0 acc = 0.0 count = 0 vocab_size = model.vocab_size for data_i in pyprind.prog_bar(xrange(len(sents))): words = sents[data_i:data_i + 1] if model_name == "bd_lstm": xs, ms = utils.make_batch(words, train=train, tail=False, mask=True) ys = model.forward(xs=xs, ms=ms, train=train) else: xs = utils.make_batch(words, train=train, tail=False) ys = model.forward(ts=xs, train=train) ys = F.concat(ys, axis=0) ts = F.concat(xs, axis=0) ys = F.reshape(ys, (-1, vocab_size)) ts = F.reshape(ts, (-1, )) loss += F.softmax_cross_entropy(ys, ts) * len(words[0]) acc += F.accuracy(ys, ts, ignore_label=-1) * len(words[0]) count += len(words[0]) loss_data = float(cuda.to_cpu(loss.data)) / count acc_data = float(cuda.to_cpu(acc.data)) / count return loss_data, acc_data
def handle(self, *args, **options): if options['tsvfile'] is None or options['sheet'] is None: error_msg = 'Enter name of tsv file and sheet number as argument.' \ ' "python manage.py import_hojas_de_vida --tsvfile=hoja0.tsv --sheet=0 --settings=ventanita.settings.local' raise CommandError(error_msg) tsv_file = options['tsvfile'] sheet = options['sheet'] self.sheet = sheet with codecs.open(tsv_file, "r") as file_handle: dump = file_handle.readlines() if sheet == '0': items = [] for line in pyprind.prog_bar(dump): item = self.parse_line(line) if item is not None: items.append(Candidato(**item)) Candidato.objects.bulk_create(items) elif sheet == '1': self.import_institucion_educativa(dump) self.import_education_for_candidate(dump) elif sheet == '2': self.import_institucion_educativa_superior(dump) self.import_education_for_candidate(dump)
def validate_directory_against_xsd(dirpath, schema): """Extract all the ``.spold`` files in the directory ``dirpath``. Use a multiprocessing pool if ``use_mp``, which is the default.""" assert os.path.isdir(dirpath), "Can't find data directory {}".format(dirpath) assert os.path.isfile(schema), "Can't find schema file {}".format(schema) filelist = [os.path.join(dirpath, filename) for filename in os.listdir(dirpath) if filename.lower().endswith(".spold") ] print("Validating {} undefined datasets".format(len(filelist))) errors = [] ecospold2_schema = etree.XMLSchema(etree.parse(open(schema))) for fp in pyprind.prog_bar(filelist): file = etree.parse(open(fp)) if not ecospold2_schema.validate(file): errors.append(os.path.basename(fp)) if errors: print("The following files did not validate:") pprint.pprint(errors) else: print("All files valid")
def count_sentence_length(corpus, count): for s in pyprind.prog_bar(corpus): length = len(s) if length >= len(count): continue count[length] += 1 return count
def track_progress(self, noisy_grad, filtered_grad): # if function passed in --- save values if self.fun is not None: self.fun_vals.append(self.fun(self.params, self.t)) # report on gradient if self.callback is not None: self.callback(self.params, self.t, noisy_grad) # update object attributes if self.save_params: self.param_trace.append(self.params.copy()) if self.save_grads: self.grad_trace.append(noisy_grad) if self.save_filtered_grads: self.filtered_grad_trace.append(filtered_grad) if self.true_grad_fun is not None: true_grad = self.true_grad_fun(self.params, self.t) self.true_grad_trace.append(true_grad) if (self.num_marginal_samples_to_save > 0) and \ (self.t % self.marginal_sample_skip == 0): nms = self.num_marginal_samples_to_save print " ... saving %d marginal samples (iter %d)" % (nms, self.t) msamps = np.array([ self.grad_fun(self.params, self.t) for _ in pyprind.prog_bar(xrange(nms)) ]) self.marginal_samples[self.t] = msamps
def parse(model, decoder, dataset, path_pred): """ :type model: SpanBasedModel :type decoder: IncrementalCKYDecoder :type dataset: numpy.ndarray :type path_pred: str :rtype: None """ with open(path_pred, "w") as f: for data in pyprind.prog_bar(dataset): edu_ids = data.edu_ids edus = data.edus edus_postag = data.edus_postag edus_head = data.edus_head sbnds = data.sbnds pbnds = data.pbnds # Feature extraction edu_vectors = model.forward_edus(edus, edus_postag, edus_head) # (n_edus, bilstm_dim) padded_edu_vectors = model.pad_edu_vectors( edu_vectors) # (n_edus+2, bilstm_dim) mask_bwd, mask_fwd = model.make_masks( ) # (1, bilstm_dim), (1, bilstm_dim) # Parsing (bracketing) span_scores = precompute_all_span_scores( model=model, edus=edus, edus_postag=edus_postag, sbnds=sbnds, pbnds=pbnds, padded_edu_vectors=padded_edu_vectors, mask_bwd=mask_bwd, mask_fwd=mask_fwd) unlabeled_sexp = decoder.decode(span_scores=span_scores, inputs=edu_ids, sbnds=sbnds, pbnds=pbnds, use_sbnds=True, use_pbnds=True) # list of str unlabeled_tree = treetk.sexp2tree(unlabeled_sexp, with_nonterminal_labels=False, with_terminal_labels=False) unlabeled_tree.calc_spans() unlabeled_spans = treetk.aggregate_spans( unlabeled_tree, include_terminal=False, order="pre-order") # list of (int, int) # Parsing (assigning majority labels to the unlabeled tree) span2label = {(b, e): "<ELABORATION,N/S>" for (b, e) in unlabeled_spans} labeled_tree = treetk.assign_labels(unlabeled_tree, span2label, with_terminal_labels=False) labeled_sexp = treetk.tree2sexp(labeled_tree) f.write("%s\n" % " ".join(labeled_sexp))
def crawler(url, start_page, end_page): with open("output.json", "w") as f: #開瀏覽器 browser = webdriver.Firefox() #取得網址 browser.get(url) #取得"產品總覽"超連結 res = browser.find_element_by_id('ContentPlaceHolder1_LinkButton11') #點下去 res.click() #crawl from start_page to end_page for i in pyprind.prog_bar(range(start_page, end_page + 1)): #排除掉第一頁 if (i != 1): #找到下一頁的按鈕 res = browser.find_element_by_link_text(str(i)) #按下去 res.click() #get the source of page pagesource = browser.page_source #get the contain of website soup = BeautifulSoup(pagesource, "lxml") #get the table table = soup.find('table', attrs={'id': 'ContentPlaceHolder1_GVTABPRO'}) #get the rows of table rows = table.find_all('tr') index = 0 for row in rows: #index == 1 means it's the first col if (index == 0): cols = row.find_all('th') colname = [element.text.strip() for element in cols] index = index + 1 else: #get the cols from rows cols = row.find_all('td') #the elements of table is stored in cols now cols = [element.text.strip() for element in cols] #the row of pages if (cols[0] == '12345678910'): break #store the cols into data #data is the type of dict data = { str(colname[0]): cols[0], str(colname[1]): cols[1], str(colname[2]): cols[2], str(colname[3]): cols[3], str(colname[4]): cols[4], str(colname[5]): cols[5], str(colname[6]): cols[6], str(colname[7]): cols[7], str(colname[8]): cols[8] } #store into dataout dataout.append(data) browser.close() f.write(json.dumps(dataout))
def crawl(i): info = graph.get_object(i) print(info) posts = graph.get_connections(i, 'posts') for p in pyprind.prog_bar(posts['data']): p['reactions'] = graph.get_connections(p['id'], 'reactions') p['comments'] = graph.get_connections(p['id'], 'comments') json.dump(posts, open('facebook.json', 'w'))
def progressbar(iterator): # if available add progress indicator try: import pyprind iterator = pyprind.prog_bar(iterator) except: pass return iterator
def import_institucion_educativa_superior(self, dump): instituciones = [] lines = self.convert_to_lines(dump) for line in pyprind.prog_bar( lines, monitor=True, title="Importing high studies for candidate"): this_inst_edu = get_institucion_superior(line) if this_inst_edu not in instituciones: instituciones.append(this_inst_edu) upload_instituciones(instituciones)
def inspect_parks(parks, output_dir): """Request data for each park, process it, and write it to disk.""" bar = pyprind.ProgBar(len(parks)) for park in pyprind.prog_bar(parks): data = inspect_park(park) fn = join(output_dir, '{}.json'.format(park.id)) with open(fn, 'w') as f: json.dump(data, f) bar.update(item_id=park.name[:20])
def train(self): memory = ReplayMem( obs_dim=self.env.observation_space.flat_dim, act_dim=self.env.action_space.flat_dim, memory_size=self.memory_size) itr = 0 path_length = 0 path_return = 0 end = False obs = self.env.reset() for epoch in xrange(self.n_epochs): logger.push_prefix("epoch #%d | " % epoch) logger.log("Training started") for epoch_itr in pyprind.prog_bar(range(self.epoch_length)): # run the policy if end: # reset the environment and stretegy when an episode ends obs = self.env.reset() self.strategy.reset() # self.policy.reset() self.strategy_path_returns.append(path_return) path_length = 0 path_return = 0 # note action is sampled from the policy not the target policy act = self.strategy.get_action(obs, self.policy) nxt, rwd, end, _ = self.env.step(act) path_length += 1 path_return += rwd if not end and path_length >= self.max_path_length: end = True if self.include_horizon_terminal: memory.add_sample(obs, act, rwd, end) else: memory.add_sample(obs, act, rwd, end) obs = nxt if memory.size >= self.memory_start_size: for update_time in xrange(self.n_updates_per_sample): batch = memory.get_batch(self.batch_size) self.do_update(itr, batch) itr += 1 logger.log("Training finished") if memory.size >= self.memory_start_size: self.evaluate(epoch, memory) logger.dump_tabular(with_prefix=False) logger.pop_prefix()
def create_final_image_barcode(pieces_width, final_width, height, fname, images): bc = Image.new('RGB', (pieces_width, height)) posx = 0 for img in pyprind.prog_bar(images): bc.paste(img[0], (posx, 0)) posx += img[1] os.chdir('..') bc = bc.resize((final_width, height), Image.ANTIALIAS) bc.save(fname, 'PNG')
def optimize_gen(self, inputs, extra_inputs=None, callback=None, yield_itr=None): if len(inputs) == 0: # Assumes that we should always sample mini-batches raise NotImplementedError f_opt = self._opt_fun["f_opt"] f_loss = self._opt_fun["f_loss"] if extra_inputs is None: extra_inputs = tuple() last_loss = f_loss(*(tuple(inputs) + extra_inputs)) start_time = time.time() dataset = BatchDataset( inputs, self._batch_size, extra_inputs=extra_inputs #, randomized=self._randomized ) itr = 0 for epoch in pyprind.prog_bar(list(range(self._max_epochs))): for batch in dataset.iterate(update=True): f_opt(*batch) if yield_itr is not None and (itr % (yield_itr+1)) == 0: yield itr += 1 new_loss = f_loss(*(tuple(inputs) + extra_inputs)) if self._verbose: logger.log("Epoch %d, loss %s" % (epoch, new_loss)) if self._callback or callback: elapsed = time.time() - start_time callback_args = dict( loss=new_loss, params=self._target.get_param_values(trainable=True) if self._target else None, itr=epoch, elapsed=elapsed, ) if self._callback: self._callback(callback_args) if callback: callback(**callback_args) if abs(last_loss - new_loss) < self._tolerance: break last_loss = new_loss
def genCharVideo(self, filepath): self.charVideo = [] cap = cv2.VideoCapture(filepath) self.timeInterval = round(1 / cap.get(5), 3) nf = int(cap.get(7)) print("Generate char video, please wait...") if cap.isOpened(): for i in pyprind.prog_bar(range(nf)): ret, vframe = cap.read() if ret: rawFrame = cv2.cvtColor(vframe, cv2.COLOR_BGR2GRAY) frame = self.convert(rawFrame, os.get_terminal_size(), fill=True) self.charVideo.append(frame) cap.release()
def epic_ixs(primers, interval=80, search_range=30): """ Find triplets of indices among primer candidates that are on the average 80 bases apart with flexibility of 30 bases. """ starts = list(map(list, zip(*primers)))[0] for start1 in pyprind.prog_bar(starts): start2 = start1 + interval start3 = start2 + interval for ix1 in range(-search_range, search_range): str2 = start2 + ix1 for ix2 in range(-search_range, search_range): str3 = start3 + ix2 if str2 in starts and str3 in starts: yield(start1, str2, str3)
def import_institucion_educativa(self, dump): instituciones = [] for line in pyprind.prog_bar(dump): fields = line.strip().split('\t') this_inst_edu = get_institucion_primaria(fields) if this_inst_edu not in instituciones: instituciones.append(this_inst_edu) this_inst_edu = get_institucion_secundaria(fields) if this_inst_edu not in instituciones: instituciones.append(this_inst_edu) upload_instituciones(instituciones)
def render_model_image(self, fimg, xlim=None, ylim=None, exclude=None): # create model image, and add each patch in - init with sky noise mod_img = np.ones(fimg.nelec.shape) * fimg.epsilon source_list = [s for s in self.srcs if s is not exclude] if not len(source_list) == 0: # add each source's model patch for s in pyprind.prog_bar(source_list): patch, ylim, xlim = s.compute_model_patch(fits_image=fimg, xlim=xlim, ylim=ylim) mod_img[ylim[0]:ylim[1], xlim[0]:xlim[1]] += patch if xlim is not None and ylim is not None: mod_img = mod_img[ylim[0]:ylim[1], xlim[0]:xlim[1]] return mod_img
def create_color_barcode(colors, bar_width, height, width, fname): barcode_width = len(colors) * bar_width bc = Image.new('RGB', (barcode_width, height)) draw = ImageDraw.Draw(bc) # draw the new barcode posx = 0 print('Generating barcode...') for color in pyprind.prog_bar(colors): draw.rectangle([posx, 0, posx + bar_width, height], fill=color) posx += bar_width del draw bc = bc.resize((width, height), Image.ANTIALIAS) bc.save(fname, 'PNG')
def import_education_for_candidate(self, dump): estudios = [] lines = self.convert_to_lines(dump) for line in pyprind.prog_bar( lines, monitor=True, title="Importing studies for candidate"): if self.sheet == '2': e = self.construct_education_obj(line, 'superior') estudios.append(e) elif self.sheet == '1': e = self.construct_education_obj(line, 'primaria') if e.inicio != '0': estudios.append(e) e = self.construct_education_obj(line, 'secundaria') if e.inicio != '0': estudios.append(e) Estudio.objects.bulk_create(estudios)
def spawn_image_threads(num_threads, fname, bar_width, height, width): # change directories if it already isn't in frames if not 'frames' in os.getcwd(): os.chdir('frames') q = queue.Queue() # get a distributed list of images for the threads images = helpers.distribute_frame_lists(num_threads) threads = [] for i in range(num_threads): t_fname = 'thread_{}_barcode.png'.format(i) thread = threading.Thread(target=create_thread_barcode, args=(bar_width, height, t_fname, images[i], i, q)) threads.append(thread) # stitch together several smaller barcodes on seperate threads # to speed up the process print('{} threads creating barcodes with {} frames each...'.format(num_threads, len(images[0]))) print('Progress bar may take a while to start moving if there are a lot of frames.') for thread in threads: # thread.daemon = True thread.start() pieces_width = 0 # a list to put the thread results in the correct order thread_results = [None] * num_threads for i in pyprind.prog_bar(range(num_threads)): result = q.get() thread_results[result[0]] = [result[1], result[2]] pieces_width += result[2] # then finally stitch together all the pices that the threads # generated print('Generating final barcode...') create_final_image_barcode(pieces_width, width, height, fname, thread_results) # delete thread pieces for i in range(num_threads): os.remove('frames/thread_{}_barcode.png'.format(i)) return
def validate_directory(dirpath): data, errors = extract_directory(dirpath, False), {} print("Validating datasets:") for ds in pyprind.prog_bar(data): try: dataset_schema(ds) except Invalid as err: errors[err.msg] = {"path": err.path, "dataset": ds} if errors: logfile = "ocelot-validation-errors.log" errors = [(k, v['path'], v['dataset']) for k, v in errors.items()] print("{} errors found.\nSee error logfile {} for details.".format( len(errors), logfile) ) with open(logfile, "w", encoding='utf-8') as f: f.write("Internal validation errors for extracted directory:\n{}\n".format(dirpath)) f.write(pprint.pformat(errors, width=120, compact=True)) else: print("No errors found")
def fit(num_epochs, minibatch_size, L, optimizer, sess): num_batches = N // minibatch_size # set up cost function and updates if load_data: idx = tf.placeholder(tf.int32, name='idx') mbsize = tf.constant(minibatch_size) xdimsize = tf.constant(xdim) x_batch = tf.slice(X_all, tf.pack([idx*mbsize, 0]), tf.pack([mbsize,xdimsize]), name='x_batch') else: x_batch = tf.placeholder(tf.float32, shape=[minibatch_size, xdim], name='X') cost = -tf.reduce_mean(vlb(x_batch, L)) * N train_step = optimizer.minimize(cost) sess.run(tf.initialize_variables(ut.nontrainable_variables())) def train(bidx): if load_data: train_step.run(feed_dict={idx:bidx}, session=sess) return cost.eval(feed_dict={idx:bidx}, session=sess) else: xb = X[bidx*minibatch_size:(bidx+1)*minibatch_size] train_step.run(feed_dict={x_batch: xb}, session=sess) return cost.eval(feed_dict={x_batch: xb}, session=sess) start = time() for i in xrange(num_epochs): bidxs = npr.permutation(num_batches) vals = [train(bidx) for bidx in pyprind.prog_bar(bidxs)] print 'epoch {:>4} of {:>4}: {:> .6}' . \ format(i+1, num_epochs, np.median(vals[-10:])) if callback: callback(i) # will tell you what nodes are being added #tf.get_default_graph().finalize() stop = time() print 'cost {}, {:>5} sec per update, {:>5} sec total\n'.format( np.median(vals[-10:]), (stop - start) / N, stop - start)
def main(args): path = args.path dim = args.dim topk = args.topk output = args.output word2vec = word_evaluation.load_word2vec(path=path, dim=dim) vocab = word2vec.keys() wrapper = word_evaluation.Wrapper(word2vec) with open(output, "w") as f: word_i = 0 vocab_size = len(vocab) for word in pyprind.prog_bar(vocab): retrieved = wrapper.most_similar(positives=[word], negatives=[], K=topk) res = [w for w, s in retrieved] res = " ".join(res) f.write("[%d/%d: %s]: %s\n" % (word_i+1, vocab_size, word, res)) f.flush() word_i += 1
def institution_verify(self, save=False, institution=['nyu', 'new york university']): if self.count != 0: remove_list = [] if save == True and not os.path.exists('./paper/%s/' %self.author): os.makedirs('./paper/%s/' %self.author) for count in pyprind.prog_bar(range(len(self.pdf))): os.system('wget -q -U "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 ' 'Firefox/3.6.3" -O ./check.pdf %s' %self.pdf[count]) if save == True: #os.system('cp ./check.pdf ./paper/%s/%s.pdf' %(self.author, self.arxiv_id[count])) if len(self.arxiv_id[count].split('/')) >1 : temp_dir = self.arxiv_id[count].split('/')[0] if not os.path.exists('./paper/%s/%s/' % (self.author, temp_dir)): os.makedirs('./paper/%s/%s/' % (self.author, temp_dir)) shutil.copy('./check.pdf', './paper/%s/%s.pdf' %(self.author, self.arxiv_id[count])) try: text = convert('./check.pdf', pages=[0,1,2]).lower() match_flag = False for match_text in institution: if text.find(match_text) != -1: match_flag = True break if match_flag == True: continue else: remove_list.append(count) except: print("Can not read file %s" % self.arxiv_id[count]) remove_list.append(count) continue os.system("rm ./check.pdf") self.arxiv_id = (np.delete(np.array(self.arxiv_id), remove_list, axis=0)).tolist() self.time = (np.delete(np.array(self.time), remove_list, axis=0)).tolist() self.title = (np.delete(np.array(self.title), remove_list, axis=0)).tolist() self.category = (np.delete(np.array(self.category), remove_list, axis=0)).tolist() self.pdf = (np.delete(np.array(self.pdf), remove_list, axis=0)).tolist() self.contributor = (np.delete(np.array(self.contributor), remove_list, axis=0)).tolist() self.count = len(self.title) self.subject = combine_subject(self.category) print('Remove %d articles' % len(remove_list))
def complete_me(content_as_list, output_filename, email): """ Add metadata to the blast output file. Metadata is obtained by querying the NCBI database. :param content_as_list: blast output content (CSV file) as list of lines. :param output_filename: write line by line. """ Entrez.email = email for i in pyprind.prog_bar(range(len(content_as_list))): line = content_as_list[i] line = line.strip() if line.startswith('query'): with open(output_filename, 'w') as handle: handle.write(line + '\tGeneLength\tTitle\n') continue line_complement = _get_metadata_as_string(line) with open(output_filename, 'a') as handle: handle.write(line + '\t' + line_complement + '\n')
def spawn_threads(threads, kmeans): # change directories if it already isn't in frames if not 'frames' in os.getcwd(): os.chdir('frames') q = queue.Queue() num_threads = threads # get a distributed list of images for the threads images = helpers.distribute_frame_lists(num_threads) threads = [] for i in range(num_threads): if kmeans: thread = threading.Thread(target=kc.get_image_colors, args=(i, q, images[i])) else: thread = threading.Thread(target=pc.get_image_colors, args=(i, q, images[i])) threads.append(thread) print('{} threads generating frame colors with {} frames each...'.format(num_threads, len(images[0]))) for thread in threads: thread.daemon = True thread.start() thread_results = [None] * num_threads for i in pyprind.prog_bar(range(num_threads)): result = q.get() thread_results[result[0]] = result[1] # return to the original directory os.chdir('..') return [item for sublist in thread_results for item in sublist]
bsrcs = ssrcs[38:39] + gsrcs[38:39] bidx = np.concatenate([sidx[38:39], gidx[38:39]]) # breadcrumbs - make sure we can examine which source corresponds to # which catalog entry blocs = np.array([s.params.u for s in bsrcs]) plocs = primary_field_df[['ra', 'dec']].values[bidx,:] assert np.allclose(blocs, plocs), "not the same location! noooo" ###################################### # gibbs step on a handful of sources # ###################################### print "======= running celeste sampler ========" # do some resampling, each source keeps each sample Nsamps = 10 for i in pyprind.prog_bar(xrange(Nsamps)): # resample photon images model.field_list[0].resample_photons(bsrcs, verbose=True) # resample source params for s in pyprind.prog_bar(bsrcs): s.resample() s.store_sample() s.store_loglike() # global/local update #for s in bsrcs: # s.sample_type() # global updates #model.sample_birth() #model.sample_death() ########################################
def __init__(self, items): from pyprind import prog_bar self.bar = prog_bar(items)