コード例 #1
0
def generate_inverse_strategy_data(strategy_lists, ef_input_keys,
                                   ef_output_keys, techno_keys_waste,
                                   techno_keys_product,
                                   unit_scaling_techno_product,
                                   unit_scaling_techno_waste, sacrificial_lca,
                                   water_dir):
    initial_ratios_inverse = {}
    print("Calculate initial in/out ratios for inverse strategy activities")
    for act in pyprind.prog_bar(strategy_lists['inverse']):
        initial_ratios_inverse[act] = 1 / initial_in_over_out(
            act, ef_input_keys, ef_output_keys, techno_keys_waste,
            techno_keys_product, unit_scaling_techno_product,
            unit_scaling_techno_waste)

    print("getting row incides for inverse strategy")
    rows_of_interest_inverse = {}
    for act in pyprind.prog_bar(strategy_lists['inverse']):
        rows_of_interest_inverse[act] = identify_rows_of_interest_inverse(
            sacrificial_lca, act, ef_input_keys, ef_output_keys,
            techno_keys_waste, techno_keys_product)

    with open(os.path.join(water_dir, "initial_ratios_inverse.pickle"),
              "wb") as f:
        pickle.dump(initial_ratios_inverse, f)
    with open(os.path.join(water_dir, "rows_of_interest_inverse.pickle"),
              "wb") as f:
        pickle.dump(rows_of_interest_inverse, f)
コード例 #2
0
def generate_default_strategy_data(strategy_lists, transformation_from,
                                   transformation_to, sacrificial_lca,
                                   land_use_dir):
    if strategy_lists['default']:
        initial_ratios_default = {}
        print(
            "Calculate initial in/out ratios for default strategy activities")
        for act in pyprind.prog_bar(strategy_lists['default']):
            initial_ratios_default[act] = initial_in_over_out(
                act,
                transformation_from,
                transformation_to,
            )

        rows_of_interest_default = {}
        print("getting rows of interest for default strategy")
        for act in pyprind.prog_bar(strategy_lists['default']):
            rows_of_interest_default[act] = identify_rows_of_interest_default(
                sacrificial_lca, act, transformation_from, transformation_to)

        with open(os.path.join(land_use_dir, "initial_ratios_default.pickle"),
                  "wb") as f:
            pickle.dump(initial_ratios_default, f)
        with open(
                os.path.join(land_use_dir, "rows_of_interest_default.pickle"),
                "wb") as f:
            pickle.dump(rows_of_interest_default, f)
コード例 #3
0
def generate_inverse_strategy_data(strategy_lists, transformation_from,
                                   transformation_to, sacrificial_lca,
                                   land_use_dir):

    if strategy_lists['inverse']:
        initial_ratios_inverse = {}
        print(
            "Calculate initial in/out ratios for inverse strategy activities")
        for act in pyprind.prog_bar(strategy_lists['inverse']):
            initial_ratios_inverse[act] = 1 / initial_in_over_out(
                act,
                transformation_from,
                transformation_to,
            )

        print("getting keys for inverse strategy")
        rows_of_interest_inverse = {}
        for act in pyprind.prog_bar(strategy_lists['inverse']):
            rows_of_interest_inverse[act] = identify_rows_of_interest_inverse(
                sacrificial_lca,
                act,
                transformation_from,
                transformation_to,
            )

        with open(os.path.join(land_use_dir, "initial_ratios_inverse.pickle"),
                  "wb") as f:
            pickle.dump(initial_ratios_inverse, f)
        with open(
                os.path.join(land_use_dir, "rows_of_interest_inverse.pickle"),
                "wb") as f:
            pickle.dump(rows_of_interest_inverse, f)
コード例 #4
0
def generate_default_strategy_data(strategy_lists, ef_input_keys,
                                   ef_output_keys, techno_keys_waste,
                                   techno_keys_product,
                                   unit_scaling_techno_product,
                                   unit_scaling_techno_waste, sacrificial_lca,
                                   water_dir):
    initial_ratios_default = {}
    print("Calculate initial in/out ratios for default strategy activities")
    for act in pyprind.prog_bar(strategy_lists['default']):
        initial_ratios_default[act] = initial_in_over_out(
            act, ef_input_keys, ef_output_keys, techno_keys_waste,
            techno_keys_product, unit_scaling_techno_product,
            unit_scaling_techno_waste)
    rows_of_interest_default = {}

    print("getting rows of interest for default strategy")
    for act in pyprind.prog_bar(strategy_lists['default']):
        rows_of_interest_default[act] = identify_rows_of_interest_default(
            sacrificial_lca, act, ef_input_keys, ef_output_keys,
            techno_keys_waste, techno_keys_product)

    with open(os.path.join(water_dir, "initial_ratios_default.pickle"),
              "wb") as f:
        pickle.dump(initial_ratios_default, f)
    with open(os.path.join(water_dir, "rows_of_interest_default.pickle"),
              "wb") as f:
        pickle.dump(rows_of_interest_default, f)
コード例 #5
0
def main(args):
    path = args.path

    filenames = os.listdir(path)
    filenames = [n for n in filenames if n.endswith(".edus.arcs")]
    filenames.sort()

    for filename in pyprind.prog_bar(filenames):
        edus_arcs = utils.read_lines(os.path.join(path, filename),
                                     process=lambda line: line.split())

        edus_deprels = []
        for arcs in edus_arcs:
            arcs = treetk.hyphens2arcs(arcs)
            deprels = [l for h, d, l in arcs]
            edus_deprels.append(deprels)

        # Write
        with open(
                os.path.join(path,
                             filename.replace(".edus.arcs", ".edus.deprels")),
                "w") as f:
            for deprels in edus_deprels:
                deprels = " ".join(deprels)
                f.write("%s\n" % deprels)
コード例 #6
0
ファイル: dataset.py プロジェクト: aehm03/deepmatcher
    def read_examples_from_file(fields, format: str, path):
        make_example = {
            'json': Example.fromJSON,
            'dict': Example.fromdict,
            'tsv': Example.fromCSV,
            'csv': Example.fromCSV
        }[format.lower()]
        lines = 0
        with open(os.path.expanduser(path), encoding="utf8") as f:
            for line in f:
                lines += 1
        with open(os.path.expanduser(path), encoding="utf8") as f:
            if format == 'csv':
                reader = unicode_csv_reader(f)
            elif format == 'tsv':
                reader = unicode_csv_reader(f, delimiter='\t')
            else:
                reader = f

            next(reader)

            examples = [
                make_example(line, fields) for line in pyprind.prog_bar(
                    reader,
                    iterations=lines,
                    title='\nReading and processing data from "' + path + '"')
            ]
        return examples
コード例 #7
0
ファイル: expose.py プロジェクト: nrrd/expose.py
def media_jobs(cfg, dry_run, is_video):
    """Generate either all image or all video jobs for a given config."""
    if is_video:
        media_lc = 'video'
        media_uc = 'Video'
        src_media = src_videos
        media_targets = vid_targets
    else:
        media_lc = 'image'
        media_uc = 'Image'
        src_media = src_images
        media_targets = img_targets

    l.info('Generating {} jobs...'.format(media_lc))
    jobs = []
    skipped = 0

    si = src_media(cfg)
    if not si:
        l.debug('No source {}s'.format(media_lc))
        return

    for src in pyprind.prog_bar(si):
        j, s = media_targets(cfg, src, dry_run)
        jobs.extend(j)
        skipped += s

    l.info('{} jobs: running {}, skipped {}, total {}'.format(
        media_uc, len(jobs), skipped,
        len(jobs) + skipped))

    return jobs
コード例 #8
0
def find_optimal_gamma(horizon=15, n_traj=1000, map_name="5x5"):
    w_env = FrozenLakeEnv(map_name="9x9",
                          horizon=horizon,
                          theta_dist="hypercube")
    for gamma in candidate_gammas:
        test_pi_H = EpsOptimalMDPPolicy(w_env, discount=gamma)
        logger.log("-------------------")
        logger.log("Evaluating gamma={} for {} timesteps".format(
            gamma, horizon))
        logger.log("-------------------")
        test_env = HumanCRLWrapper(w_env, test_pi_H, 0)
        logger.log("Obtaining Samples...")
        # Alas, the rllab samplers don't support hot swapping envs and batch sizes
        # TODO: write a new parallel sampler, instead of sampling manually
        rewards = []
        regrets = []
        for i in pyprind.prog_bar(range(n_traj)):
            observation = test_env.reset()
            for t in range(horizon):
                action = test_env.nA - 1
                observation, reward, done, info = test_env.step(action)
                if done:
                    rewards.append(info["accumulated rewards"])
                    regrets.append(info["accumulated regret"])
                    break
        #feel free to add more data
        logger.log("NumTrajs {}".format(n_traj))
        logger.log("AverageReturn {}".format(np.mean(rewards)))
        logger.log("StdReturn {}".format(np.std(rewards)))
        logger.log("MaxReturn {}".format(np.max(rewards)))
        logger.log("MinReturn {}".format(np.min(rewards)))
        logger.log("AverageRegret {}".format(np.mean(regrets)))
        logger.log("MaxRegret {}".format(np.max(regrets)))
        logger.log("MinRegret {}".format(np.min(regrets)))
コード例 #9
0
def eval_mdp_policies(horizon=15, n_traj=100000, log_dir=None):
    text_output_file = None if log_dir is None else osp.join(log_dir, "text")
    w_env = FrozenLakeEnv(horizon=horizon)
    if text_output_file is not None:
        logger.add_text_output(text_output_file)
    for human_policy in human_mdp_policies.values():
        logger.log("-------------------")
        logger.log("Evaluating {} for {} timesteps".format(
            human_policy.__name__, horizon))
        logger.log("-------------------")

        test_pi_H = human_policy(w_env)
        test_env = HumanCRLWrapper(w_env, test_pi_H)
        logger.log("Obtaining Samples...")
        rewards = []
        for i in pyprind.prog_bar(range(n_traj)):
            observation = test_env.reset()
            for t in range(horizon):
                # _, action = observation
                # if action == test_env.nA:
                action = test_env.nA - 1
                observation, reward, done, info = test_env.step(action)
                if done:
                    rewards.append(info["accumulated rewards"])
                    break
        #feel free to add more data
        logger.log("NumTrajs {}".format(n_traj))
        logger.log("AverageReturn {}".format(np.mean(rewards)))
        logger.log("StdReturn {}".format(np.std(rewards)))
        logger.log("MaxReturn {}".format(np.max(rewards)))
        logger.log("MinReturn {}".format(np.min(rewards)))
コード例 #10
0
def crawl_songs(area_list, save_path):
    singer_id_done = []
    for root, dirs, files in os.walk(save_path):
        for file_name in files:
            singer_id = re.search("song_list_.*_(.*).json", file_name).group(1)
            singer_id_done.append(int(singer_id))

    area_2_singers = json.load(
        open("../Sources/qq_music_yield/area_2_singers.json",
             "r",
             encoding="utf-8"))

    for area in area_list:
        singer_list = area_2_singers[area]
        bar = pyprind.ProgBar(
            len(singer_list),
            title="process of crawling songs of singers of {}".format(area))
        for singer in pyprind.prog_bar(singer_list):
            singer_name = singer[settings.KEY_SINGER_NAME]
            singer_id = singer[settings.KEY_SINGER_ID]
            if singer_id in singer_id_done:
                continue
            song_list = crawl_song_list(singer)
            json.dump(
                song_list,
                open("%s/song_list_%s_%s.json" %
                     (save_path, singer_name, singer_id),
                     "w",
                     encoding="utf-8"))
            bar.update()
コード例 #11
0
    def train(self, sess=None):

        if sess is None:
            sess = tf.Session()

        sess.run(tf.global_variables_initializer())

        replay_buffer = SimpleReplayBuffer(env_spec=self._env.spec, max_replay_buffer_size=self._max_pool_size)

        path_length = 0
        episode_rewards = 0
        observation = self._env.reset()

        with sess.as_default():
            self._update_target()

            for ep in range(self._n_epochs):
                mean_loss = 0
                trained_iter = 0
                epoch_rewards = list()
                episode_lengths = list()
                with logger.prefix('Epoch #%d | ' % ep):
                    for ep_iter in pyprind.prog_bar(range(self._epoch_length)):
                        self._env.render()
                        action, _ = self._es.get_action(observation)
                        next_observation, reward, terminal, _ = self._env.step(action)

                        replay_buffer.add_sample(
                            observation=observation,
                            next_observation=next_observation,
                            action=action,
                            terminal=terminal,
                            reward=reward,
                        )

                        episode_rewards += reward
                        path_length += 1

                        observation = next_observation

                        if terminal or path_length >= self._max_path_length:
                            observation = self._env.reset()
                            epoch_rewards.append(episode_rewards)
                            episode_lengths.append(path_length)
                            path_length = 0
                            episode_rewards = 0

                        iter = ep * self._epoch_length + ep_iter
                        if replay_buffer.size > self._min_pool_size:
                            batch = replay_buffer.random_batch(self._batch_size)
                            loss = self._do_training(iter, batch)
                            mean_loss += loss
                            trained_iter += 1

                        if iter % self._target_update_period == 0 and replay_buffer.size > self._min_pool_size:
                            self._update_target()
                    logger.record_tabular('mean-td-error', (mean_loss/self._epoch_length))
                    logger.record_tabular('mean-episode-reward', np.mean(epoch_rewards))
                    logger.record_tabular('mean-epsiode-length', np.mean(episode_lengths))
                    logger.dump_tabular()
コード例 #12
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented"))
    filenames = [n for n in filenames if n.endswith(".txt")]
    filenames.sort()

    utils.mkdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                     "preprocessed"))

    for filename in pyprind.prog_bar(filenames):
        path_seg = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "segmented", filename)
        path_raw = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "raw", filename)
        path_dst = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "preprocessed",
                                filename.replace(".txt", ".edus"))
        # Input
        edus = utils.read_lines(path_seg, process=lambda line: line)
        edus = remove_empty_lines(filename, edus)
        raw_lines = utils.read_lines(path_raw, process=lambda line: line)
        raw_lines = remove_empty_lines(filename, raw_lines)
        assert count_chars(edus) == count_chars(raw_lines)
        # Processing
        edus = convert_edus(edus, raw_lines)
        assert count_chars(edus) == count_chars(raw_lines)
        # Output
        utils.write_lines(path_dst, edus)
コード例 #13
0
    def _identify_techno_keys(self):
        """Identify keys of activities with water production exchanges

         These should be considered in balancing. Keys are grouped by activities
         associated with input exchanges (e.g. wastewater treatment) and
         output exchanges (e.g. potable water)
         """
        names_file = Path(__file__).parents[0]/'data'/'water_intermediary_exchange_names.json'
        if not names_file.is_file():
            raise FileNotFoundError("Could not find file water_intermediary_exchange_names.json in expected location")
        with open(names_file, "rb") as f:
            techno_product_names_dict = json.load(f)
        techno_product_names = techno_product_names_dict[self.ecoinvent_version]
        techno_treat_keys = []
        techno_transfo_keys = []
        db_loaded = Database(self.database_name).load()
        for act_key, act in pyprind.prog_bar(db_loaded.items()):
            if act['reference product'] in techno_product_names:
                if act['production amount']<0:
                    techno_treat_keys.append(act_key)
                elif act['production amount']>0:
                    techno_transfo_keys.append(act_key)
                else:
                    warnings.warn("Activity {} has a product exchange {} with "
                                  "an amount of 0: skipped".format(
                        act_key,
                        act['reference product']
                    ))
        return techno_transfo_keys, techno_treat_keys
コード例 #14
0
def subject_verify(new_arxiv):
    if new_arxiv.count > 0:
        subject_list = copy.copy(new_arxiv.subject)
        remove_list = []
        new_ver = arxiv(new_arxiv.author)
        new_ver.parse()
        for count in pyprind.prog_bar(range(len(new_ver.title))):
            if len(set(subject_list) & set(new_ver.category[count])) == 0:
                remove_list.append(count)
        new_ver.arxiv_id = (np.delete(np.array(new_ver.arxiv_id),
                                      remove_list,
                                      axis=0)).tolist()
        new_ver.time = (np.delete(np.array(new_ver.time), remove_list,
                                  axis=0)).tolist()
        new_ver.title = (np.delete(np.array(new_ver.title),
                                   remove_list,
                                   axis=0)).tolist()
        new_ver.category = (np.delete(np.array(new_ver.category),
                                      remove_list,
                                      axis=0)).tolist()
        new_ver.pdf = (np.delete(np.array(new_ver.pdf), remove_list,
                                 axis=0)).tolist()
        new_ver.contributor = (np.delete(np.array(new_ver.contributor),
                                         remove_list,
                                         axis=0)).tolist()
        new_ver.count = len(new_ver.title)
        new_ver.subject = combine_subject(new_ver.category)
        print('Remove %d articles' % len(remove_list))
        return new_ver
    else:
        return new_arxiv
def main():
    dataset_path = "/path/to/Caltech-101"
    modelzoo_path = "/path/to/VGG16"
    
    # create an instance
    convnet = FeatureExtractor(
            prototxt_path=os.path.join(modelzoo_path, "vgg16_deploy.prototxt"),
            caffemodel_path=os.path.join(modelzoo_path, "vgg16.caffemodel"),
            target_layer_name="fc7",
            image_size=224,
            mean_values=[103.939, 116.779, 123.68])
    
    # header
    f = open("caltech101_vggnet_fc7_features.csv", "w")
    header = ["filepath"]
    for i in xrange(4096):
        header.append("feat%d" % (i+1))
    header = ",".join(header) + "\n"
    f.write(header)
    
    # extract features
    categories = os.listdir(dataset_path)
    for category in pyprind.prog_bar(categories):
        file_names = os.listdir(os.path.join(dataset_path, category))
        for file_name in file_names:
            img = cv2.imread(os.path.join(dataset_path, category, file_name))
            feat = convnet.transform(img)
            feat_str = [os.path.join(category, file_name)]
            for value in feat:
                feat_str.append(str(value))
            row = ",".join(feat_str)
            f.write("%s\n" % row)
            f.flush()

    f.close()
コード例 #16
0
def split_by_id(beatdf, id_field='ptid', frac_train=.6, frac_val=.15):
    """ Deterministically splits the beatdf by _patient_ """
    empis = np.sort(beatdf[id_field].unique())
    print("Splitting %d unique patients" % len(empis))

    # deterministic split
    rs = np.random.RandomState(0)
    perm_idx = rs.permutation(len(empis))
    num_train = int(frac_train * len(empis))
    num_val = int(frac_val * len(empis))
    train_idx = perm_idx[:num_train]
    val_idx = perm_idx[num_train:(num_train + num_val)]
    test_idx = perm_idx[(num_train + num_val):]
    empis_train = empis[train_idx]
    empis_val = empis[val_idx]
    empis_test = empis[test_idx]
    print(" ... patient splits: %d train, %d val, %d test " %
          (len(empis_train), len(empis_val), len(empis_test)))

    # make dictionaries
    train_dict = {e: "train" for e in empis_train}
    val_dict = {e: "val" for e in empis_val}
    test_dict = {e: "test" for e in empis_test}
    split_dict = {**train_dict, **val_dict, **test_dict}

    # add train/val test split to each
    split = []
    for e in pyprind.prog_bar(beatdf[id_field]):
        split.append(split_dict[e])

    beatdf['split'] = split
    return beatdf
コード例 #17
0
ファイル: expose.py プロジェクト: tlvince/expose.py
def media_jobs(cfg, dry_run, is_video):
    if is_video:
        media_lc = 'video'
        media_uc = 'Video'
        src_media = src_videos
        media_targets = vid_targets
    else:
        media_lc = 'image'
        media_uc = 'Image'
        src_media = src_images
        media_targets = img_targets

    l.info('Generating {} jobs...'.format(media_lc))
    jobs = []
    skipped = 0

    si = src_media(cfg)
    if not si:
        l.debug('No source {}s'.format(media_lc))
        return

    for src in pyprind.prog_bar(si):
        j, s = media_targets(cfg, src, dry_run)
        jobs.extend(j)
        skipped += s

    l.info('{} jobs: running {}, skipped {}, total {}'
           .format(media_uc, len(jobs), skipped, len(jobs) + skipped))

    return jobs
コード例 #18
0
def Train_Eval_Process_Layer_v2(train_X,train_Y,test_X,test_Y):
    # LSTM
    epoch_num = 10
    #model = LSTM_model(input_dim=8,hidden_dim=8)
    model = One_Sent2Other_Sent()
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCELoss()
    for epoch_  in pyprind.prog_bar(range(epoch_num)):
        model.train()
        for i in range(len(train_X)):
            X = torch.tensor(train_X[i])#.cuda()
            pred_train_Y = model(X)
            Y = torch.tensor([train_Y[i]])#.cuda()
            true_train_Y = Y.squeeze(dim=-1)
            loss = criterion(pred_train_Y, true_train_Y.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('loss:',loss)
        model.eval()
        pred_test_Y = list()
        for i in range(len(test_X)):
            X = torch.tensor(test_X[i])#.cuda()
            pred_test_Y_i = model(X).cpu().data.numpy().reshape(1,1)
            pred_test_Y.append(pred_test_Y_i)
        test_Y_hat = np.concatenate(pred_test_Y,0)
        test_Y_hat_list = list()
        for i in range(test_Y_hat.shape[0]):
            if test_Y_hat[i,0] >= 0.5:
                test_Y_hat_list.append(1)
            else:
                test_Y_hat_list.append(0)
        Evaluation(test_Y_hat_list,test_Y)
コード例 #19
0
    def write_db_to_brightway(self):
        for s in pyprind.prog_bar(self.scenarios.items()):
            scenario, year = s

            print('Write new database to Brightway2.')
            wurst.write_brightway2_database(
                self.db, "ecoinvent_" + scenario + "_" + str(year))
コード例 #20
0
def extract_ecospold2_directory(dirpath, use_mp=True):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    if os.name == 'nt':
        use_mp = False

    assert os.path.isdir(dirpath), "Can't find directory {}".format(dirpath)
    filelist = [os.path.join(dirpath, filename)
                for filename in os.listdir(dirpath)
                if filename.lower().endswith(".spold")
                ]

    print("Extracting {} undefined datasets".format(len(filelist)))

    if use_mp:
        start = time()
        # With code from
        # http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/
        with multiprocessing.Pool(
                processes=multiprocessing.cpu_count(),
                initializer=lambda : signal.signal(signal.SIGINT, signal.SIG_IGN)
            ) as pool:
            try:
                data = pool.map(generic_extractor, filelist)
            except KeyboardInterrupt:
                pool.terminate()
                raise KeyboardInterrupt
        print("Extracted {} undefined datasets in {:.1f} seconds".format(len(data), time() - start))
    else:
        data = [generic_extractor(fp)
                for fp in pyprind.prog_bar(filelist)]

    # Unroll lists of lists
    return [y for x in data for y in x]
コード例 #21
0
def validate_directory_against_xsd(dirpath, schema):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    assert os.path.isdir(dirpath), "Can't find data directory {}".format(
        dirpath)
    assert os.path.isfile(schema), "Can't find schema file {}".format(schema)

    filelist = [
        os.path.join(dirpath, filename) for filename in os.listdir(dirpath)
        if filename.lower().endswith(".spold")
    ]

    print(("Validating {} undefined datasets".format(len(filelist))))

    errors = []
    ecospold2_schema = etree.XMLSchema(etree.parse(open(schema)))

    for fp in pyprind.prog_bar(filelist):
        file = etree.parse(open(fp))
        if not ecospold2_schema.validate(file):
            errors.append(os.path.basename(fp))

    if errors:
        print("The following files did not validate:")
        pprint.pprint(errors)
    else:
        print("All files valid")
コード例 #22
0
def main():
    config = utils.Config()

    path_out = os.path.join(config.getpath("data"), "aarc_abst")
    utils.mkdir(path_out)

    filenames = os.listdir(config.getpath("aarc"))
    filenames = [n for n in filenames if n.endswith(".txt.utf8")]
    filenames.sort()

    nlp = spacy.load("en_core_web_sm",
                     disable=["tagger", "parser", "ner", "textcat"])

    cnt = 0
    for filename in pyprind.prog_bar(filenames):
        text = extract_abstract(os.path.join(config.getpath("aarc"), filename))
        if text == "":
            # print("No Abstract!: %s" % filename)
            continue
        with open(
                os.path.join(path_out,
                             filename.replace(".txt.utf8", ".doc.tokens")),
                "w") as f:
            doc = nlp(text)
            tokens = [token.text for token in doc]
            assert len(tokens) > 0
            tokens = " ".join(tokens)
            f.write("%s\n" % tokens)
        cnt += 1

    print("Processed %d/%d files" % (cnt, len(filenames)))
コード例 #23
0
    def bulk_upload(self):
        items_to_upload = []
        append = items_to_upload.append

        credentials = get_db_credentials(self.settings)
        if 'sqlite3' in credentials['ENGINE']:
            db = dataset.connect("sqlite:///" + os.path.basename(credentials['NAME']))
        if 'postgresql' in credentials['ENGINE']:
            db = dataset.connect('postgresql://' +
                                 credentials['USER'] + ':' +
                                 credentials['PASSWORD'] + '@' +
                                 credentials['HOST'] + ':' +
                                 credentials['PORT'] + '/' +
                                 credentials['NAME'])
        table = db['visitors_visitor']

        print("Starting checks to see if we have this item in our database.")
        if len(self.items) == 0:
            print("Nothing to upload")
        else:
            for i in pyprind.prog_bar(range(len(self.items))):
                item = self.items[i]
                try:
                    item['date'] = datetime.datetime.strptime(
                        item['date'],
                        '%Y-%m-%d',
                        )
                except ValueError:
                    item['date'] = None

                append(item)

            print("uploading %i records for table %s" % (len(items_to_upload), self.mytable))

            table.insert_many(items_to_upload)
コード例 #24
0
def Format_csv2XY(path):
    X, Y, title, self_contradictory_template, revision_id_list = list(), list(
    ), list(), list(), list()
    df = pd.read_csv(path)
    page_title = list(df['page_title'])
    revision_text = list(df['revision_text'])
    revision_id = list(df['revision_id'])
    for i in pyprind.prog_bar(range(len(revision_text))):
        self_contradictory_template_i = list()
        text = revision_text[i]
        title_i = page_title[i]
        revision_id_i = revision_id[i]
        if isinstance(text, str) is True and len(text.split()) != 0:
            wikicode = mwparserfromhell.parse(text)
            templates = wikicode.filter_templates()
            is_pos = False
            for j in range(len(templates)):
                if 'Self-contradictory' in templates[j]:
                    is_pos = True
                    self_contradictory_template_i.append(templates[j])
            if is_pos:
                X.append(str(text))
                title.append(title_i)
                Y.append(1)
            else:
                X.append(str(text))
                title.append(title_i)
                Y.append(0)
            self_contradictory_template.append(self_contradictory_template_i)
            revision_id_list.append(revision_id_i)
    return X, Y, title, self_contradictory_template, revision_id_list
コード例 #25
0
def Train_Eval_Process_Layer(train_X, train_Y, test_X, test_Y):
    # RetaGNN + Self Attention
    import pyprind
    import pickle
    epoch_num = 10
    input_dim = 8
    hidden_dim = 8
    model = double_LSTM_model().cuda()
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCELoss()
    for epoch_ in range(epoch_num):
        model.train()
        for i in pyprind.prog_bar(range(len(train_X))):
            batch_X, batch_Y = train_X[i], train_Y[i]  #(b,l,d) ,(b,)
            batch_Y_hat = model(batch_X).squeeze(dim=-1)
            loss = criterion(batch_Y_hat, batch_Y.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('loss:',loss)
        model.eval()
        pred_Y = list()
        for i in range(len(test_X)):
            pred_Y.append(model(test_X[i]).view(1, -1))
        test_Y_hat = torch.cat(pred_Y, 0).cpu().data.numpy()
        test_Y_hat_list = list()
        for i in range(test_Y_hat.shape[0]):
            if test_Y_hat[i, 0] >= 0.5:
                test_Y_hat_list.append(1)
            else:
                test_Y_hat_list.append(0)
        Evaluation(test_Y_hat_list, test_Y)
コード例 #26
0
def extract_ecospold2_directory(dirpath, use_mp=True):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    if os.name == 'nt':
        use_mp = False

    assert os.path.isdir(dirpath), "Can't find directory {}".format(dirpath)
    filelist = [
        os.path.join(dirpath, filename) for filename in os.listdir(dirpath)
        if filename.lower().endswith(".spold")
    ]

    print(("Extracting {} undefined datasets".format(len(filelist))))

    if use_mp:
        start = time()
        # With code from
        # http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/
        with multiprocessing.Pool(processes=multiprocessing.cpu_count(),
                                  initializer=lambda: signal.signal(
                                      signal.SIGINT, signal.SIG_IGN)) as pool:
            try:
                data = pool.map(generic_extractor, filelist)
            except KeyboardInterrupt:
                pool.terminate()
                raise KeyboardInterrupt
        print(("Extracted {} undefined datasets in {:.1f} seconds".format(
            len(data),
            time() - start)))
    else:
        data = [generic_extractor(fp) for fp in pyprind.prog_bar(filelist)]

    # Unroll lists of lists
    return [y for x in data for y in x]
コード例 #27
0
ファイル: jobCrawler.py プロジェクト: Stufinite/arrogant
    def getJob(self):
        job = []
        for i in range(1, 1000):
            if requests.get(
                    'https://www.yourator.co/api/v2/jobs?page={}'.format(
                        i)).json()['jobs'] == []:
                break
            job += requests.get(
                'https://www.yourator.co/api/v2/jobs?page={}'.format(
                    i)).json()['jobs']

        for i in pyprind.prog_bar(job):
            res = requests.get('https://www.yourator.co/' + i['path']).text
            soup = BeautifulSoup(res, "html.parser")
            i['inside'] = {}
            i['inside']['description'] = soup.select(
                '.description')[0].text.strip() if len(
                    soup.select('.description')) else ''
            for j in soup.select('.basic-info'):
                key, value = j.text.strip().replace(' ',
                                                    '').replace('\n',
                                                                '').split(':')
                i['inside'][key] = value

            if i['has_salary_info']:
                for j in soup.select('h2'):
                    if j.text == '薪資範圍':
                        i['salary'] = j.findNext('article').text
        with open('job.json', 'w') as f:
            json.dump(self.testData(job), f)
コード例 #28
0
def calc_features(net, n_images, blobs):
    n_images = int(0.6 * n_images)
    batchsize = net.blobs['data'].data.shape[0]
    feats = dict()
    for blob in blobs:
        out_shape = list(net.blobs[blob].data.shape)
        out_shape[0] = n_images
        print('Will allocate {:.2f} GiB of memory'.format(
            np.prod(out_shape) * 2 / 1024 / 1024 / 1024))
        feats[blob] = np.zeros(
            tuple(out_shape),
            dtype=np.float16 if not blob == 'label' else np.int32)
    print('Need %.3f GiB' %
          (np.sum([x.nbytes for x in feats.values()]) / 1024 / 1024 / 1024))

    for it in pyprind.prog_bar(range(0, n_images, batchsize),
                               update_interval=10,
                               stream=sys.stderr):
        net.forward()
        for blob in blobs:
            feats[blob][it:it + batchsize,
                        ...] = net.blobs[blob].data[:feats[blob][it:it +
                                                                 batchsize,
                                                                 ...].shape[0],
                                                    ...]

    return [feats[blob] for blob in blobs]
コード例 #29
0
ファイル: utils_conf.py プロジェクト: carlgogo/VIP
    def __new__(cls, iterable=None, desc=None, total=None, leave=True,
                backend=None, verbose=True):
        if backend is None:
            backend = Progressbar.backend

        if not verbose:
            backend = "hide"

        if backend == "tqdm":
            from tqdm import tqdm
            return tqdm(iterable=iterable, desc=desc, total=total, leave=leave,
                        ascii=True, ncols=80, file=sys.stdout,
                        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed"
                                   "}<{remaining}{postfix}]") # remove rate_fmt
        elif backend == "tqdm_notebook":
            from tqdm import tqdm_notebook
            return tqdm_notebook(iterable=iterable, desc=desc, total=total,
                                 leave=leave)
        elif backend == "pyprind":
            from pyprind import ProgBar, prog_bar
            ProgBar._adjust_width = lambda self: None  # keep constant width
            if iterable is None:
                return ProgBar(total, title=desc, stream=1)
            else:
                return prog_bar(iterable, title=desc, stream=1,
                                iterations=total)
        elif backend == "hide":
            return NoProgressbar(iterable=iterable)
        else:
            raise NotImplementedError("unknown backend")
コード例 #30
0
def evaluate(model, model_name, sents, ivocab):
    train = False
    loss = 0.0
    acc = 0.0
    count = 0
    vocab_size = model.vocab_size
    for data_i in pyprind.prog_bar(xrange(len(sents))):
        words = sents[data_i:data_i + 1]

        if model_name == "bd_lstm":
            xs, ms = utils.make_batch(words,
                                      train=train,
                                      tail=False,
                                      mask=True)
            ys = model.forward(xs=xs, ms=ms, train=train)
        else:
            xs = utils.make_batch(words, train=train, tail=False)
            ys = model.forward(ts=xs, train=train)

        ys = F.concat(ys, axis=0)
        ts = F.concat(xs, axis=0)
        ys = F.reshape(ys, (-1, vocab_size))
        ts = F.reshape(ts, (-1, ))

        loss += F.softmax_cross_entropy(ys, ts) * len(words[0])
        acc += F.accuracy(ys, ts, ignore_label=-1) * len(words[0])
        count += len(words[0])

    loss_data = float(cuda.to_cpu(loss.data)) / count
    acc_data = float(cuda.to_cpu(acc.data)) / count

    return loss_data, acc_data
コード例 #31
0
    def handle(self, *args, **options):
        if options['tsvfile'] is None or options['sheet'] is None:
            error_msg = 'Enter name of tsv file and sheet number as argument.' \
                        ' "python manage.py import_hojas_de_vida --tsvfile=hoja0.tsv --sheet=0 --settings=ventanita.settings.local'
            raise CommandError(error_msg)

        tsv_file = options['tsvfile']
        sheet = options['sheet']
        self.sheet = sheet

        with codecs.open(tsv_file, "r") as file_handle:
            dump = file_handle.readlines()

        if sheet == '0':
            items = []
            for line in pyprind.prog_bar(dump):
                item = self.parse_line(line)
                if item is not None:
                    items.append(Candidato(**item))
            Candidato.objects.bulk_create(items)
        elif sheet == '1':
            self.import_institucion_educativa(dump)
            self.import_education_for_candidate(dump)
        elif sheet == '2':
            self.import_institucion_educativa_superior(dump)
            self.import_education_for_candidate(dump)
コード例 #32
0
def validate_directory_against_xsd(dirpath, schema):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    assert os.path.isdir(dirpath), "Can't find data directory {}".format(dirpath)
    assert os.path.isfile(schema), "Can't find schema file {}".format(schema)

    filelist = [os.path.join(dirpath, filename)
                for filename in os.listdir(dirpath)
                if filename.lower().endswith(".spold")
                ]

    print("Validating {} undefined datasets".format(len(filelist)))

    errors = []
    ecospold2_schema = etree.XMLSchema(etree.parse(open(schema)))

    for fp in pyprind.prog_bar(filelist):
        file = etree.parse(open(fp))
        if not ecospold2_schema.validate(file):
            errors.append(os.path.basename(fp))

    if errors:
        print("The following files did not validate:")
        pprint.pprint(errors)
    else:
        print("All files valid")
コード例 #33
0
def count_sentence_length(corpus, count):
    for s in pyprind.prog_bar(corpus):
        length = len(s)
        if length >= len(count):
            continue
        count[length] += 1
    return count
コード例 #34
0
    def track_progress(self, noisy_grad, filtered_grad):

        # if function passed in --- save values
        if self.fun is not None:
            self.fun_vals.append(self.fun(self.params, self.t))

        # report on gradient
        if self.callback is not None:
            self.callback(self.params, self.t, noisy_grad)

        # update object attributes
        if self.save_params:
            self.param_trace.append(self.params.copy())

        if self.save_grads:
            self.grad_trace.append(noisy_grad)

        if self.save_filtered_grads:
            self.filtered_grad_trace.append(filtered_grad)

        if self.true_grad_fun is not None:
            true_grad = self.true_grad_fun(self.params, self.t)
            self.true_grad_trace.append(true_grad)

        if (self.num_marginal_samples_to_save > 0) and \
           (self.t % self.marginal_sample_skip == 0):
            nms = self.num_marginal_samples_to_save
            print "  ... saving %d marginal samples (iter %d)" % (nms, self.t)
            msamps = np.array([
                self.grad_fun(self.params, self.t)
                for _ in pyprind.prog_bar(xrange(nms))
            ])
            self.marginal_samples[self.t] = msamps
コード例 #35
0
ファイル: utils_conf.py プロジェクト: r4lv/VIP
    def __new__(cls, iterable=None, desc=None, total=None, leave=True,
                backend=None, verbose=True):
        if backend is None:
            backend = Progressbar.backend

        if not verbose:
            backend = "hide"

        if backend == "tqdm":
            from tqdm import tqdm
            return tqdm(iterable=iterable, desc=desc, total=total, leave=leave,
                        ascii=True, ncols=80, file=sys.stdout,
                        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed"
                                   "}<{remaining}{postfix}]") # remove rate_fmt
        elif backend == "tqdm_notebook":
            from tqdm import tqdm_notebook
            return tqdm_notebook(iterable=iterable, desc=desc, total=total,
                                 leave=leave)
        elif backend == "pyprind":
            from pyprind import ProgBar, prog_bar
            ProgBar._adjust_width = lambda self: None  # keep constant width
            if iterable is None:
                return ProgBar(total, title=desc, stream=1)
            else:
                return prog_bar(iterable, title=desc, stream=1,
                                iterations=total)
        elif backend == "hide":
            return NoProgressbar(iterable=iterable)
        else:
            raise NotImplementedError("unknown backend")
コード例 #36
0
def parse(model, decoder, dataset, path_pred):
    """
    :type model: SpanBasedModel
    :type decoder: IncrementalCKYDecoder
    :type dataset: numpy.ndarray
    :type path_pred: str
    :rtype: None
    """
    with open(path_pred, "w") as f:

        for data in pyprind.prog_bar(dataset):
            edu_ids = data.edu_ids
            edus = data.edus
            edus_postag = data.edus_postag
            edus_head = data.edus_head
            sbnds = data.sbnds
            pbnds = data.pbnds

            # Feature extraction
            edu_vectors = model.forward_edus(edus, edus_postag,
                                             edus_head)  # (n_edus, bilstm_dim)
            padded_edu_vectors = model.pad_edu_vectors(
                edu_vectors)  # (n_edus+2, bilstm_dim)
            mask_bwd, mask_fwd = model.make_masks(
            )  # (1, bilstm_dim), (1, bilstm_dim)

            # Parsing (bracketing)
            span_scores = precompute_all_span_scores(
                model=model,
                edus=edus,
                edus_postag=edus_postag,
                sbnds=sbnds,
                pbnds=pbnds,
                padded_edu_vectors=padded_edu_vectors,
                mask_bwd=mask_bwd,
                mask_fwd=mask_fwd)
            unlabeled_sexp = decoder.decode(span_scores=span_scores,
                                            inputs=edu_ids,
                                            sbnds=sbnds,
                                            pbnds=pbnds,
                                            use_sbnds=True,
                                            use_pbnds=True)  # list of str
            unlabeled_tree = treetk.sexp2tree(unlabeled_sexp,
                                              with_nonterminal_labels=False,
                                              with_terminal_labels=False)
            unlabeled_tree.calc_spans()
            unlabeled_spans = treetk.aggregate_spans(
                unlabeled_tree, include_terminal=False,
                order="pre-order")  # list of (int, int)

            # Parsing (assigning majority labels to the unlabeled tree)
            span2label = {(b, e): "<ELABORATION,N/S>"
                          for (b, e) in unlabeled_spans}
            labeled_tree = treetk.assign_labels(unlabeled_tree,
                                                span2label,
                                                with_terminal_labels=False)
            labeled_sexp = treetk.tree2sexp(labeled_tree)

            f.write("%s\n" % " ".join(labeled_sexp))
コード例 #37
0
def crawler(url, start_page, end_page):
    with open("output.json", "w") as f:
        #開瀏覽器
        browser = webdriver.Firefox()
        #取得網址
        browser.get(url)
        #取得"產品總覽"超連結
        res = browser.find_element_by_id('ContentPlaceHolder1_LinkButton11')
        #點下去
        res.click()
        #crawl from start_page to end_page
        for i in pyprind.prog_bar(range(start_page, end_page + 1)):
            #排除掉第一頁
            if (i != 1):
                #找到下一頁的按鈕
                res = browser.find_element_by_link_text(str(i))
                #按下去
                res.click()
            #get the source of page
            pagesource = browser.page_source
            #get the contain of website
            soup = BeautifulSoup(pagesource, "lxml")
            #get the table
            table = soup.find('table',
                              attrs={'id': 'ContentPlaceHolder1_GVTABPRO'})
            #get the rows of table
            rows = table.find_all('tr')
            index = 0
            for row in rows:
                #index == 1 means it's the first col
                if (index == 0):
                    cols = row.find_all('th')
                    colname = [element.text.strip() for element in cols]
                    index = index + 1
                else:
                    #get the cols from rows
                    cols = row.find_all('td')
                    #the elements of table is stored in cols now
                    cols = [element.text.strip() for element in cols]
                    #the row of pages
                    if (cols[0] == '12345678910'):
                        break
                    #store the cols into data
                    #data is the type of dict
                    data = {
                        str(colname[0]): cols[0],
                        str(colname[1]): cols[1],
                        str(colname[2]): cols[2],
                        str(colname[3]): cols[3],
                        str(colname[4]): cols[4],
                        str(colname[5]): cols[5],
                        str(colname[6]): cols[6],
                        str(colname[7]): cols[7],
                        str(colname[8]): cols[8]
                    }
                    #store into dataout
                    dataout.append(data)
        browser.close()
        f.write(json.dumps(dataout))
コード例 #38
0
ファイル: fb.py プロジェクト: david30907d/Python_Crawler
def crawl(i):
	info = graph.get_object(i)
	print(info)
	posts = graph.get_connections(i, 'posts')
	for p in pyprind.prog_bar(posts['data']):
		p['reactions'] = graph.get_connections(p['id'], 'reactions')
		p['comments'] = graph.get_connections(p['id'], 'comments')
	json.dump(posts, open('facebook.json', 'w'))
コード例 #39
0
ファイル: misc.py プロジェクト: andim/mise
def progressbar(iterator):
    # if available add progress indicator
    try:
        import pyprind
        iterator = pyprind.prog_bar(iterator)
    except:
        pass
    return iterator
コード例 #40
0
 def import_institucion_educativa_superior(self, dump):
     instituciones = []
     lines = self.convert_to_lines(dump)
     for line in pyprind.prog_bar(
             lines, monitor=True, title="Importing high studies for candidate"):
         this_inst_edu = get_institucion_superior(line)
         if this_inst_edu not in instituciones:
             instituciones.append(this_inst_edu)
     upload_instituciones(instituciones)
コード例 #41
0
ファイル: inspect_parks.py プロジェクト: mplewis/park-ratings
def inspect_parks(parks, output_dir):
    """Request data for each park, process it, and write it to disk."""
    bar = pyprind.ProgBar(len(parks))
    for park in pyprind.prog_bar(parks):
        data = inspect_park(park)
        fn = join(output_dir, '{}.json'.format(park.id))
        with open(fn, 'w') as f:
            json.dump(data, f)
        bar.update(item_id=park.name[:20])
コード例 #42
0
ファイル: ddpg.py プロジェクト: CoderHHX/incubator-mxnet
    def train(self):

        memory = ReplayMem(
            obs_dim=self.env.observation_space.flat_dim,
            act_dim=self.env.action_space.flat_dim,
            memory_size=self.memory_size)

        itr = 0
        path_length = 0
        path_return = 0
        end = False
        obs = self.env.reset()

        for epoch in xrange(self.n_epochs):
            logger.push_prefix("epoch #%d | " % epoch)
            logger.log("Training started")
            for epoch_itr in pyprind.prog_bar(range(self.epoch_length)):
                # run the policy
                if end:
                    # reset the environment and stretegy when an episode ends
                    obs = self.env.reset()
                    self.strategy.reset()
                    # self.policy.reset()
                    self.strategy_path_returns.append(path_return)
                    path_length = 0
                    path_return = 0
                # note action is sampled from the policy not the target policy
                act = self.strategy.get_action(obs, self.policy)
                nxt, rwd, end, _ = self.env.step(act)

                path_length += 1
                path_return += rwd

                if not end and path_length >= self.max_path_length:
                    end = True
                    if self.include_horizon_terminal:
                        memory.add_sample(obs, act, rwd, end)
                else:
                    memory.add_sample(obs, act, rwd, end)

                obs = nxt

                if memory.size >= self.memory_start_size:
                    for update_time in xrange(self.n_updates_per_sample):
                        batch = memory.get_batch(self.batch_size)
                        self.do_update(itr, batch)

                itr += 1

            logger.log("Training finished")
            if memory.size >= self.memory_start_size:
                self.evaluate(epoch, memory)
            logger.dump_tabular(with_prefix=False)
            logger.pop_prefix()
コード例 #43
0
def create_final_image_barcode(pieces_width, final_width, height, fname, images):
    bc = Image.new('RGB', (pieces_width, height))
    
    posx = 0
    for img in pyprind.prog_bar(images):
        bc.paste(img[0], (posx, 0))
        posx += img[1]

    os.chdir('..')
    bc = bc.resize((final_width, height), Image.ANTIALIAS)
    bc.save(fname, 'PNG')
コード例 #44
0
    def optimize_gen(self, inputs, extra_inputs=None, callback=None, yield_itr=None):

        if len(inputs) == 0:
            # Assumes that we should always sample mini-batches
            raise NotImplementedError

        f_opt = self._opt_fun["f_opt"]
        f_loss = self._opt_fun["f_loss"]

        if extra_inputs is None:
            extra_inputs = tuple()

        last_loss = f_loss(*(tuple(inputs) + extra_inputs))

        start_time = time.time()

        dataset = BatchDataset(
            inputs, self._batch_size,
            extra_inputs=extra_inputs
            #, randomized=self._randomized
        )

        itr = 0
        for epoch in pyprind.prog_bar(list(range(self._max_epochs))):
            for batch in dataset.iterate(update=True):
                f_opt(*batch)
                if yield_itr is not None and (itr % (yield_itr+1)) == 0:
                    yield
                itr += 1

            new_loss = f_loss(*(tuple(inputs) + extra_inputs))
            if self._verbose:
                logger.log("Epoch %d, loss %s" % (epoch, new_loss))

            if self._callback or callback:
                elapsed = time.time() - start_time
                callback_args = dict(
                    loss=new_loss,
                    params=self._target.get_param_values(trainable=True) if self._target else None,
                    itr=epoch,
                    elapsed=elapsed,
                )
                if self._callback:
                    self._callback(callback_args)
                if callback:
                    callback(**callback_args)

            if abs(last_loss - new_loss) < self._tolerance:
                break
            last_loss = new_loss
コード例 #45
0
ファイル: char_video.py プロジェクト: lamontu/starter
 def genCharVideo(self, filepath):
     self.charVideo = []
     cap = cv2.VideoCapture(filepath)
     self.timeInterval = round(1 / cap.get(5), 3)
     nf = int(cap.get(7))
     print("Generate char video, please wait...")
     if cap.isOpened():
         for i in pyprind.prog_bar(range(nf)):
             ret, vframe = cap.read()
             if ret:
                 rawFrame = cv2.cvtColor(vframe, cv2.COLOR_BGR2GRAY)
                 frame = self.convert(rawFrame, os.get_terminal_size(), fill=True)
                 self.charVideo.append(frame)
         cap.release()
コード例 #46
0
ファイル: primerfinder.py プロジェクト: manutamminen/epride
def epic_ixs(primers, interval=80, search_range=30):
    """ Find triplets of indices among primer candidates that are on the average 80 bases apart
        with flexibility of 30 bases.
    """
    starts = list(map(list, zip(*primers)))[0]
    for start1 in pyprind.prog_bar(starts):
        start2 = start1 + interval
        start3 = start2 + interval
        for ix1 in range(-search_range, search_range):
            str2 = start2 + ix1
            for ix2 in range(-search_range, search_range):
                str3 = start3 + ix2
                if str2 in starts and str3 in starts:
                    yield(start1, str2, str3)
コード例 #47
0
    def import_institucion_educativa(self, dump):
        instituciones = []
        for line in pyprind.prog_bar(dump):
            fields = line.strip().split('\t')

            this_inst_edu = get_institucion_primaria(fields)
            if this_inst_edu not in instituciones:
                instituciones.append(this_inst_edu)

            this_inst_edu = get_institucion_secundaria(fields)
            if this_inst_edu not in instituciones:
                instituciones.append(this_inst_edu)

        upload_instituciones(instituciones)
コード例 #48
0
ファイル: models.py プロジェクト: HIPS/DESI-MCMC
    def render_model_image(self, fimg, xlim=None, ylim=None, exclude=None):
        # create model image, and add each patch in - init with sky noise
        mod_img     = np.ones(fimg.nelec.shape) * fimg.epsilon
        source_list = [s for s in self.srcs if s is not exclude]

        if not len(source_list) == 0:
            # add each source's model patch
            for s in pyprind.prog_bar(source_list):
                patch, ylim, xlim = s.compute_model_patch(fits_image=fimg, xlim=xlim, ylim=ylim)
                mod_img[ylim[0]:ylim[1], xlim[0]:xlim[1]] += patch

        if xlim is not None and ylim is not None:
            mod_img = mod_img[ylim[0]:ylim[1], xlim[0]:xlim[1]]

        return mod_img
コード例 #49
0
def create_color_barcode(colors, bar_width, height, width, fname):
    barcode_width = len(colors) * bar_width
    bc = Image.new('RGB', (barcode_width, height))
    draw = ImageDraw.Draw(bc)

    # draw the new barcode
    posx = 0
    print('Generating barcode...')
    for color in pyprind.prog_bar(colors):
        draw.rectangle([posx, 0, posx + bar_width, height], fill=color)
        posx += bar_width

    del draw

    bc = bc.resize((width, height), Image.ANTIALIAS)
    bc.save(fname, 'PNG')
コード例 #50
0
    def import_education_for_candidate(self, dump):
        estudios = []
        lines = self.convert_to_lines(dump)
        for line in pyprind.prog_bar(
                lines, monitor=True, title="Importing studies for candidate"):
            if self.sheet == '2':
                e = self.construct_education_obj(line, 'superior')
                estudios.append(e)
            elif self.sheet == '1':
                e = self.construct_education_obj(line, 'primaria')
                if e.inicio != '0':
                    estudios.append(e)

                e = self.construct_education_obj(line, 'secundaria')
                if e.inicio != '0':
                    estudios.append(e)

        Estudio.objects.bulk_create(estudios)
コード例 #51
0
def spawn_image_threads(num_threads, fname, bar_width, height, width):
    # change directories if it already isn't in frames
    if not 'frames' in os.getcwd():
        os.chdir('frames')

    q = queue.Queue()

    # get a distributed list of images for the threads
    images = helpers.distribute_frame_lists(num_threads)
    
    threads = []
    for i in range(num_threads):
        t_fname = 'thread_{}_barcode.png'.format(i)
        thread = threading.Thread(target=create_thread_barcode, 
                                  args=(bar_width, height, t_fname, images[i], i, q))
        threads.append(thread)


    # stitch together several smaller barcodes on seperate threads
    # to speed up the process
    print('{} threads creating barcodes with {} frames each...'.format(num_threads, len(images[0])))
    print('Progress bar may take a while to start moving if there are a lot of frames.')
    for thread in threads:
        # thread.daemon = True
        thread.start()

    pieces_width = 0
    # a list to put the thread results in the correct order
    thread_results = [None] * num_threads 
    for i in pyprind.prog_bar(range(num_threads)):
        result = q.get()
        thread_results[result[0]] = [result[1], result[2]]
        pieces_width += result[2]

    # then finally stitch together all the pices that the threads
    # generated
    print('Generating final barcode...')
    create_final_image_barcode(pieces_width, width, height, fname, thread_results)

    # delete thread pieces
    for i in range(num_threads):
        os.remove('frames/thread_{}_barcode.png'.format(i))

    return
コード例 #52
0
def validate_directory(dirpath):
    data, errors = extract_directory(dirpath, False), {}
    print("Validating datasets:")
    for ds in pyprind.prog_bar(data):
        try:
            dataset_schema(ds)
        except Invalid as err:
            errors[err.msg] = {"path": err.path, "dataset": ds}
    if errors:
        logfile = "ocelot-validation-errors.log"
        errors = [(k, v['path'], v['dataset']) for k, v in errors.items()]
        print("{} errors found.\nSee error logfile {} for details.".format(
            len(errors), logfile)
        )
        with open(logfile, "w", encoding='utf-8') as f:
            f.write("Internal validation errors for extracted directory:\n{}\n".format(dirpath))
            f.write(pprint.pformat(errors, width=120, compact=True))
    else:
        print("No errors found")
コード例 #53
0
ファイル: vae.py プロジェクト: andymiller/vae-flow
    def fit(num_epochs, minibatch_size, L, optimizer, sess):
        num_batches = N // minibatch_size

        # set up cost function and updates
        if load_data:
            idx      = tf.placeholder(tf.int32, name='idx')
            mbsize   = tf.constant(minibatch_size)
            xdimsize = tf.constant(xdim)
            x_batch  = tf.slice(X_all, tf.pack([idx*mbsize, 0]),
                                       tf.pack([mbsize,xdimsize]), name='x_batch')
        else:
            x_batch  = tf.placeholder(tf.float32, shape=[minibatch_size, xdim],
                                      name='X')
        cost = -tf.reduce_mean(vlb(x_batch, L)) * N
        train_step = optimizer.minimize(cost)

        sess.run(tf.initialize_variables(ut.nontrainable_variables()))

        def train(bidx):
            if load_data:
                train_step.run(feed_dict={idx:bidx}, session=sess)
                return cost.eval(feed_dict={idx:bidx}, session=sess)
            else:
                xb = X[bidx*minibatch_size:(bidx+1)*minibatch_size]
                train_step.run(feed_dict={x_batch: xb}, session=sess)
                return cost.eval(feed_dict={x_batch: xb}, session=sess)

        start = time()
        for i in xrange(num_epochs):
            bidxs = npr.permutation(num_batches)
            vals = [train(bidx) for bidx in pyprind.prog_bar(bidxs)]
            print 'epoch {:>4} of {:>4}: {:> .6}' . \
                    format(i+1, num_epochs, np.median(vals[-10:]))
            if callback:
                callback(i)

            # will tell you what nodes are being added
            #tf.get_default_graph().finalize()

        stop = time()
        print 'cost {}, {:>5} sec per update, {:>5} sec total\n'.format(
            np.median(vals[-10:]), (stop - start) / N, stop - start)
コード例 #54
0
def main(args):
    path = args.path
    dim = args.dim
    topk = args.topk
    output = args.output

    word2vec = word_evaluation.load_word2vec(path=path, dim=dim)
    vocab = word2vec.keys()
    wrapper = word_evaluation.Wrapper(word2vec)

    with open(output, "w") as f:
        word_i = 0
        vocab_size = len(vocab)
        for word in pyprind.prog_bar(vocab):
            retrieved = wrapper.most_similar(positives=[word], negatives=[], K=topk)
            res = [w for w, s in retrieved]
            res = " ".join(res)
            f.write("[%d/%d: %s]: %s\n" % (word_i+1, vocab_size, word, res))
            f.flush()
            word_i += 1
コード例 #55
0
def subject_verify(new_arxiv):
    if new_arxiv.count > 0:
        subject_list = copy.copy(new_arxiv.subject)
        remove_list = []
        new_ver = arxiv(new_arxiv.author)
        new_ver.parse()
        for count in pyprind.prog_bar(range(len(new_ver.title))):
            if len(set(subject_list) & set(new_ver.category[count])) == 0:
                remove_list.append(count)
        new_ver.arxiv_id = (np.delete(np.array(new_ver.arxiv_id), remove_list, axis=0)).tolist()
        new_ver.time = (np.delete(np.array(new_ver.time), remove_list, axis=0)).tolist()
        new_ver.title = (np.delete(np.array(new_ver.title), remove_list, axis=0)).tolist()
        new_ver.category = (np.delete(np.array(new_ver.category), remove_list, axis=0)).tolist()
        new_ver.pdf = (np.delete(np.array(new_ver.pdf), remove_list, axis=0)).tolist()
        new_ver.contributor = (np.delete(np.array(new_ver.contributor), remove_list, axis=0)).tolist()
        new_ver.count = len(new_ver.title)
        new_ver.subject = combine_subject(new_ver.category)
        print('Remove %d articles' % len(remove_list))
        return new_ver
    else:
        return new_arxiv
コード例 #56
0
 def institution_verify(self, save=False, institution=['nyu', 'new york university']):
     if self.count != 0:
         remove_list = []
         if save == True and not os.path.exists('./paper/%s/' %self.author):
             os.makedirs('./paper/%s/' %self.author)
         for count in pyprind.prog_bar(range(len(self.pdf))):
             os.system('wget -q -U "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 '
                       'Firefox/3.6.3" -O ./check.pdf %s' %self.pdf[count])
             if save == True:
                 #os.system('cp ./check.pdf ./paper/%s/%s.pdf' %(self.author, self.arxiv_id[count]))
                 if len(self.arxiv_id[count].split('/')) >1 :
                     temp_dir = self.arxiv_id[count].split('/')[0]
                     if not os.path.exists('./paper/%s/%s/' % (self.author, temp_dir)):
                         os.makedirs('./paper/%s/%s/' % (self.author, temp_dir))
                 shutil.copy('./check.pdf', './paper/%s/%s.pdf' %(self.author, self.arxiv_id[count]))
             try:
                 text = convert('./check.pdf', pages=[0,1,2]).lower()
                 match_flag = False
                 for match_text in institution:
                     if text.find(match_text) != -1:
                         match_flag = True
                         break
                 if match_flag == True:
                     continue
                 else:
                     remove_list.append(count)
             except:
                 print("Can not read file %s" % self.arxiv_id[count])
                 remove_list.append(count)
                 continue
         os.system("rm ./check.pdf")
         self.arxiv_id = (np.delete(np.array(self.arxiv_id), remove_list, axis=0)).tolist()
         self.time = (np.delete(np.array(self.time), remove_list, axis=0)).tolist()
         self.title = (np.delete(np.array(self.title), remove_list, axis=0)).tolist()
         self.category = (np.delete(np.array(self.category), remove_list, axis=0)).tolist()
         self.pdf = (np.delete(np.array(self.pdf), remove_list, axis=0)).tolist()
         self.contributor = (np.delete(np.array(self.contributor), remove_list, axis=0)).tolist()
         self.count = len(self.title)
         self.subject = combine_subject(self.category)
         print('Remove %d articles' % len(remove_list))
コード例 #57
0
ファイル: api.py プロジェクト: carlosp420/elum
def complete_me(content_as_list, output_filename, email):
    """
    Add metadata to the blast output file. Metadata is obtained by querying the
    NCBI database.

    :param content_as_list: blast output content (CSV file) as list of lines.
    :param output_filename: write line by line.
    """
    Entrez.email = email

    for i in pyprind.prog_bar(range(len(content_as_list))):
        line = content_as_list[i]
        line = line.strip()
        if line.startswith('query'):
            with open(output_filename, 'w') as handle:
                handle.write(line + '\tGeneLength\tTitle\n')
            continue

        line_complement = _get_metadata_as_string(line)

        with open(output_filename, 'a') as handle:
            handle.write(line + '\t' + line_complement + '\n')
コード例 #58
0
def spawn_threads(threads, kmeans):
    # change directories if it already isn't in frames
    if not 'frames' in os.getcwd():
        os.chdir('frames')

    q = queue.Queue()
    num_threads = threads

    # get a distributed list of images for the threads
    images = helpers.distribute_frame_lists(num_threads)

    threads = []
    for i in range(num_threads):
        if kmeans:
            thread = threading.Thread(target=kc.get_image_colors,
                                      args=(i, q, images[i]))
        else:
            thread = threading.Thread(target=pc.get_image_colors,
                                      args=(i, q, images[i]))

        threads.append(thread)

    print('{} threads generating frame colors with {} frames each...'.format(num_threads, len(images[0])))
    for thread in threads:
        thread.daemon = True
        thread.start()

    thread_results = [None] * num_threads
    for i in pyprind.prog_bar(range(num_threads)):
        result = q.get()
        thread_results[result[0]] = result[1]

    # return to the original directory
    os.chdir('..')

    return [item for sublist in thread_results for item in sublist]
コード例 #59
0
ファイル: run_celeste.py プロジェクト: HIPS/DESI-MCMC
    bsrcs = ssrcs[38:39] + gsrcs[38:39]
    bidx  = np.concatenate([sidx[38:39], gidx[38:39]])

    # breadcrumbs - make sure we can examine which source corresponds to
    # which catalog entry
    blocs = np.array([s.params.u for s in bsrcs])
    plocs = primary_field_df[['ra', 'dec']].values[bidx,:]
    assert np.allclose(blocs, plocs), "not the same location! noooo"

    ######################################
    # gibbs step on a handful of sources #
    ######################################
    print "======= running celeste sampler ========"
    # do some resampling, each source keeps each sample
    Nsamps = 10
    for i in pyprind.prog_bar(xrange(Nsamps)):
        # resample photon images
        model.field_list[0].resample_photons(bsrcs, verbose=True)
        # resample source params
        for s in pyprind.prog_bar(bsrcs):
            s.resample()
            s.store_sample()
            s.store_loglike()
        # global/local update
        #for s in bsrcs:
        #    s.sample_type()
        # global updates
        #model.sample_birth()
        #model.sample_death()

    ########################################
コード例 #60
0
ファイル: util.py プロジェクト: proboscis/pyutil
 def __init__(self, items):
     from pyprind import prog_bar
     self.bar = prog_bar(items)