Esempi in Python per prog_bar, esempi in Python per pyprind.prog_bar

Esempio n. 1

0

Mostra file

File: water_balancing_data.py Progetto: zerateltu/database_wide_monte_carlo

def generate_inverse_strategy_data(strategy_lists, ef_input_keys,
                                   ef_output_keys, techno_keys_waste,
                                   techno_keys_product,
                                   unit_scaling_techno_product,
                                   unit_scaling_techno_waste, sacrificial_lca,
                                   water_dir):
    initial_ratios_inverse = {}
    print("Calculate initial in/out ratios for inverse strategy activities")
    for act in pyprind.prog_bar(strategy_lists['inverse']):
        initial_ratios_inverse[act] = 1 / initial_in_over_out(
            act, ef_input_keys, ef_output_keys, techno_keys_waste,
            techno_keys_product, unit_scaling_techno_product,
            unit_scaling_techno_waste)

    print("getting row incides for inverse strategy")
    rows_of_interest_inverse = {}
    for act in pyprind.prog_bar(strategy_lists['inverse']):
        rows_of_interest_inverse[act] = identify_rows_of_interest_inverse(
            sacrificial_lca, act, ef_input_keys, ef_output_keys,
            techno_keys_waste, techno_keys_product)

    with open(os.path.join(water_dir, "initial_ratios_inverse.pickle"),
              "wb") as f:
        pickle.dump(initial_ratios_inverse, f)
    with open(os.path.join(water_dir, "rows_of_interest_inverse.pickle"),
              "wb") as f:
        pickle.dump(rows_of_interest_inverse, f)

Esempio n. 2

0

Mostra file

def generate_default_strategy_data(strategy_lists, transformation_from,
                                   transformation_to, sacrificial_lca,
                                   land_use_dir):
    if strategy_lists['default']:
        initial_ratios_default = {}
        print(
            "Calculate initial in/out ratios for default strategy activities")
        for act in pyprind.prog_bar(strategy_lists['default']):
            initial_ratios_default[act] = initial_in_over_out(
                act,
                transformation_from,
                transformation_to,
            )

        rows_of_interest_default = {}
        print("getting rows of interest for default strategy")
        for act in pyprind.prog_bar(strategy_lists['default']):
            rows_of_interest_default[act] = identify_rows_of_interest_default(
                sacrificial_lca, act, transformation_from, transformation_to)

        with open(os.path.join(land_use_dir, "initial_ratios_default.pickle"),
                  "wb") as f:
            pickle.dump(initial_ratios_default, f)
        with open(
                os.path.join(land_use_dir, "rows_of_interest_default.pickle"),
                "wb") as f:
            pickle.dump(rows_of_interest_default, f)

Esempio n. 3

0

Mostra file

def generate_inverse_strategy_data(strategy_lists, transformation_from,
                                   transformation_to, sacrificial_lca,
                                   land_use_dir):

    if strategy_lists['inverse']:
        initial_ratios_inverse = {}
        print(
            "Calculate initial in/out ratios for inverse strategy activities")
        for act in pyprind.prog_bar(strategy_lists['inverse']):
            initial_ratios_inverse[act] = 1 / initial_in_over_out(
                act,
                transformation_from,
                transformation_to,
            )

        print("getting keys for inverse strategy")
        rows_of_interest_inverse = {}
        for act in pyprind.prog_bar(strategy_lists['inverse']):
            rows_of_interest_inverse[act] = identify_rows_of_interest_inverse(
                sacrificial_lca,
                act,
                transformation_from,
                transformation_to,
            )

        with open(os.path.join(land_use_dir, "initial_ratios_inverse.pickle"),
                  "wb") as f:
            pickle.dump(initial_ratios_inverse, f)
        with open(
                os.path.join(land_use_dir, "rows_of_interest_inverse.pickle"),
                "wb") as f:
            pickle.dump(rows_of_interest_inverse, f)

Esempio n. 4

0

Mostra file

File: water_balancing_data.py Progetto: zerateltu/database_wide_monte_carlo

def generate_default_strategy_data(strategy_lists, ef_input_keys,
                                   ef_output_keys, techno_keys_waste,
                                   techno_keys_product,
                                   unit_scaling_techno_product,
                                   unit_scaling_techno_waste, sacrificial_lca,
                                   water_dir):
    initial_ratios_default = {}
    print("Calculate initial in/out ratios for default strategy activities")
    for act in pyprind.prog_bar(strategy_lists['default']):
        initial_ratios_default[act] = initial_in_over_out(
            act, ef_input_keys, ef_output_keys, techno_keys_waste,
            techno_keys_product, unit_scaling_techno_product,
            unit_scaling_techno_waste)
    rows_of_interest_default = {}

    print("getting rows of interest for default strategy")
    for act in pyprind.prog_bar(strategy_lists['default']):
        rows_of_interest_default[act] = identify_rows_of_interest_default(
            sacrificial_lca, act, ef_input_keys, ef_output_keys,
            techno_keys_waste, techno_keys_product)

    with open(os.path.join(water_dir, "initial_ratios_default.pickle"),
              "wb") as f:
        pickle.dump(initial_ratios_default, f)
    with open(os.path.join(water_dir, "rows_of_interest_default.pickle"),
              "wb") as f:
        pickle.dump(rows_of_interest_default, f)

Esempio n. 5

0

Mostra file

def main(args):
    path = args.path

    filenames = os.listdir(path)
    filenames = [n for n in filenames if n.endswith(".edus.arcs")]
    filenames.sort()

    for filename in pyprind.prog_bar(filenames):
        edus_arcs = utils.read_lines(os.path.join(path, filename),
                                     process=lambda line: line.split())

        edus_deprels = []
        for arcs in edus_arcs:
            arcs = treetk.hyphens2arcs(arcs)
            deprels = [l for h, d, l in arcs]
            edus_deprels.append(deprels)

        # Write
        with open(
                os.path.join(path,
                             filename.replace(".edus.arcs", ".edus.deprels")),
                "w") as f:
            for deprels in edus_deprels:
                deprels = " ".join(deprels)
                f.write("%s\n" % deprels)

Esempio n. 6

0

Mostra file

File: dataset.py Progetto: aehm03/deepmatcher

    def read_examples_from_file(fields, format: str, path):
        make_example = {
            'json': Example.fromJSON,
            'dict': Example.fromdict,
            'tsv': Example.fromCSV,
            'csv': Example.fromCSV
        }[format.lower()]
        lines = 0
        with open(os.path.expanduser(path), encoding="utf8") as f:
            for line in f:
                lines += 1
        with open(os.path.expanduser(path), encoding="utf8") as f:
            if format == 'csv':
                reader = unicode_csv_reader(f)
            elif format == 'tsv':
                reader = unicode_csv_reader(f, delimiter='\t')
            else:
                reader = f

            next(reader)

            examples = [
                make_example(line, fields) for line in pyprind.prog_bar(
                    reader,
                    iterations=lines,
                    title='\nReading and processing data from "' + path + '"')
            ]
        return examples

Esempio n. 7

0

Mostra file

File: expose.py Progetto: nrrd/expose.py

def media_jobs(cfg, dry_run, is_video):
    """Generate either all image or all video jobs for a given config."""
    if is_video:
        media_lc = 'video'
        media_uc = 'Video'
        src_media = src_videos
        media_targets = vid_targets
    else:
        media_lc = 'image'
        media_uc = 'Image'
        src_media = src_images
        media_targets = img_targets

    l.info('Generating {} jobs...'.format(media_lc))
    jobs = []
    skipped = 0

    si = src_media(cfg)
    if not si:
        l.debug('No source {}s'.format(media_lc))
        return

    for src in pyprind.prog_bar(si):
        j, s = media_targets(cfg, src, dry_run)
        jobs.extend(j)
        skipped += s

    l.info('{} jobs: running {}, skipped {}, total {}'.format(
        media_uc, len(jobs), skipped,
        len(jobs) + skipped))

    return jobs

Esempio n. 8

0

Mostra file

def find_optimal_gamma(horizon=15, n_traj=1000, map_name="5x5"):
    w_env = FrozenLakeEnv(map_name="9x9",
                          horizon=horizon,
                          theta_dist="hypercube")
    for gamma in candidate_gammas:
        test_pi_H = EpsOptimalMDPPolicy(w_env, discount=gamma)
        logger.log("-------------------")
        logger.log("Evaluating gamma={} for {} timesteps".format(
            gamma, horizon))
        logger.log("-------------------")
        test_env = HumanCRLWrapper(w_env, test_pi_H, 0)
        logger.log("Obtaining Samples...")
        # Alas, the rllab samplers don't support hot swapping envs and batch sizes
        # TODO: write a new parallel sampler, instead of sampling manually
        rewards = []
        regrets = []
        for i in pyprind.prog_bar(range(n_traj)):
            observation = test_env.reset()
            for t in range(horizon):
                action = test_env.nA - 1
                observation, reward, done, info = test_env.step(action)
                if done:
                    rewards.append(info["accumulated rewards"])
                    regrets.append(info["accumulated regret"])
                    break
        #feel free to add more data
        logger.log("NumTrajs {}".format(n_traj))
        logger.log("AverageReturn {}".format(np.mean(rewards)))
        logger.log("StdReturn {}".format(np.std(rewards)))
        logger.log("MaxReturn {}".format(np.max(rewards)))
        logger.log("MinReturn {}".format(np.min(rewards)))
        logger.log("AverageRegret {}".format(np.mean(regrets)))
        logger.log("MaxRegret {}".format(np.max(regrets)))
        logger.log("MinRegret {}".format(np.min(regrets)))

Esempio n. 9

0

Mostra file

def eval_mdp_policies(horizon=15, n_traj=100000, log_dir=None):
    text_output_file = None if log_dir is None else osp.join(log_dir, "text")
    w_env = FrozenLakeEnv(horizon=horizon)
    if text_output_file is not None:
        logger.add_text_output(text_output_file)
    for human_policy in human_mdp_policies.values():
        logger.log("-------------------")
        logger.log("Evaluating {} for {} timesteps".format(
            human_policy.__name__, horizon))
        logger.log("-------------------")

        test_pi_H = human_policy(w_env)
        test_env = HumanCRLWrapper(w_env, test_pi_H)
        logger.log("Obtaining Samples...")
        rewards = []
        for i in pyprind.prog_bar(range(n_traj)):
            observation = test_env.reset()
            for t in range(horizon):
                # _, action = observation
                # if action == test_env.nA:
                action = test_env.nA - 1
                observation, reward, done, info = test_env.step(action)
                if done:
                    rewards.append(info["accumulated rewards"])
                    break
        #feel free to add more data
        logger.log("NumTrajs {}".format(n_traj))
        logger.log("AverageReturn {}".format(np.mean(rewards)))
        logger.log("StdReturn {}".format(np.std(rewards)))
        logger.log("MaxReturn {}".format(np.max(rewards)))
        logger.log("MinReturn {}".format(np.min(rewards)))

Esempio n. 10

0

Mostra file

def crawl_songs(area_list, save_path):
    singer_id_done = []
    for root, dirs, files in os.walk(save_path):
        for file_name in files:
            singer_id = re.search("song_list_.*_(.*).json", file_name).group(1)
            singer_id_done.append(int(singer_id))

    area_2_singers = json.load(
        open("../Sources/qq_music_yield/area_2_singers.json",
             "r",
             encoding="utf-8"))

    for area in area_list:
        singer_list = area_2_singers[area]
        bar = pyprind.ProgBar(
            len(singer_list),
            title="process of crawling songs of singers of {}".format(area))
        for singer in pyprind.prog_bar(singer_list):
            singer_name = singer[settings.KEY_SINGER_NAME]
            singer_id = singer[settings.KEY_SINGER_ID]
            if singer_id in singer_id_done:
                continue
            song_list = crawl_song_list(singer)
            json.dump(
                song_list,
                open("%s/song_list_%s_%s.json" %
                     (save_path, singer_name, singer_id),
                     "w",
                     encoding="utf-8"))
            bar.update()

Esempio n. 11

0

Mostra file

    def train(self, sess=None):

        if sess is None:
            sess = tf.Session()

        sess.run(tf.global_variables_initializer())

        replay_buffer = SimpleReplayBuffer(env_spec=self._env.spec, max_replay_buffer_size=self._max_pool_size)

        path_length = 0
        episode_rewards = 0
        observation = self._env.reset()

        with sess.as_default():
            self._update_target()

            for ep in range(self._n_epochs):
                mean_loss = 0
                trained_iter = 0
                epoch_rewards = list()
                episode_lengths = list()
                with logger.prefix('Epoch #%d | ' % ep):
                    for ep_iter in pyprind.prog_bar(range(self._epoch_length)):
                        self._env.render()
                        action, _ = self._es.get_action(observation)
                        next_observation, reward, terminal, _ = self._env.step(action)

                        replay_buffer.add_sample(
                            observation=observation,
                            next_observation=next_observation,
                            action=action,
                            terminal=terminal,
                            reward=reward,
                        )

                        episode_rewards += reward
                        path_length += 1

                        observation = next_observation

                        if terminal or path_length >= self._max_path_length:
                            observation = self._env.reset()
                            epoch_rewards.append(episode_rewards)
                            episode_lengths.append(path_length)
                            path_length = 0
                            episode_rewards = 0

                        iter = ep * self._epoch_length + ep_iter
                        if replay_buffer.size > self._min_pool_size:
                            batch = replay_buffer.random_batch(self._batch_size)
                            loss = self._do_training(iter, batch)
                            mean_loss += loss
                            trained_iter += 1

                        if iter % self._target_update_period == 0 and replay_buffer.size > self._min_pool_size:
                            self._update_target()
                    logger.record_tabular('mean-td-error', (mean_loss/self._epoch_length))
                    logger.record_tabular('mean-episode-reward', np.mean(epoch_rewards))
                    logger.record_tabular('mean-epsiode-length', np.mean(episode_lengths))
                    logger.dump_tabular()

Esempio n. 12

0

Mostra file

def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented"))
    filenames = [n for n in filenames if n.endswith(".txt")]
    filenames.sort()

    utils.mkdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                     "preprocessed"))

    for filename in pyprind.prog_bar(filenames):
        path_seg = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "segmented", filename)
        path_raw = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "raw", filename)
        path_dst = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "preprocessed",
                                filename.replace(".txt", ".edus"))
        # Input
        edus = utils.read_lines(path_seg, process=lambda line: line)
        edus = remove_empty_lines(filename, edus)
        raw_lines = utils.read_lines(path_raw, process=lambda line: line)
        raw_lines = remove_empty_lines(filename, raw_lines)
        assert count_chars(edus) == count_chars(raw_lines)
        # Processing
        edus = convert_edus(edus, raw_lines)
        assert count_chars(edus) == count_chars(raw_lines)
        # Output
        utils.write_lines(path_dst, edus)

Esempio n. 13

0

Mostra file

    def _identify_techno_keys(self):
        """Identify keys of activities with water production exchanges

         These should be considered in balancing. Keys are grouped by activities
         associated with input exchanges (e.g. wastewater treatment) and
         output exchanges (e.g. potable water)
         """
        names_file = Path(__file__).parents[0]/'data'/'water_intermediary_exchange_names.json'
        if not names_file.is_file():
            raise FileNotFoundError("Could not find file water_intermediary_exchange_names.json in expected location")
        with open(names_file, "rb") as f:
            techno_product_names_dict = json.load(f)
        techno_product_names = techno_product_names_dict[self.ecoinvent_version]
        techno_treat_keys = []
        techno_transfo_keys = []
        db_loaded = Database(self.database_name).load()
        for act_key, act in pyprind.prog_bar(db_loaded.items()):
            if act['reference product'] in techno_product_names:
                if act['production amount']<0:
                    techno_treat_keys.append(act_key)
                elif act['production amount']>0:
                    techno_transfo_keys.append(act_key)
                else:
                    warnings.warn("Activity {} has a product exchange {} with "
                                  "an amount of 0: skipped".format(
                        act_key,
                        act['reference product']
                    ))
        return techno_transfo_keys, techno_treat_keys

Esempio n. 14

0

Mostra file

File: parse_arxiv.py Progetto: NYU-MSDSE-SWG/arxiv-search

def subject_verify(new_arxiv):
    if new_arxiv.count > 0:
        subject_list = copy.copy(new_arxiv.subject)
        remove_list = []
        new_ver = arxiv(new_arxiv.author)
        new_ver.parse()
        for count in pyprind.prog_bar(range(len(new_ver.title))):
            if len(set(subject_list) & set(new_ver.category[count])) == 0:
                remove_list.append(count)
        new_ver.arxiv_id = (np.delete(np.array(new_ver.arxiv_id),
                                      remove_list,
                                      axis=0)).tolist()
        new_ver.time = (np.delete(np.array(new_ver.time), remove_list,
                                  axis=0)).tolist()
        new_ver.title = (np.delete(np.array(new_ver.title),
                                   remove_list,
                                   axis=0)).tolist()
        new_ver.category = (np.delete(np.array(new_ver.category),
                                      remove_list,
                                      axis=0)).tolist()
        new_ver.pdf = (np.delete(np.array(new_ver.pdf), remove_list,
                                 axis=0)).tolist()
        new_ver.contributor = (np.delete(np.array(new_ver.contributor),
                                         remove_list,
                                         axis=0)).tolist()
        new_ver.count = len(new_ver.title)
        new_ver.subject = combine_subject(new_ver.category)
        print('Remove %d articles' % len(remove_list))
        return new_ver
    else:
        return new_arxiv

Esempio n. 15

0

Mostra file

File: extract_features_caltech101.py Progetto: norikinishida/image-feature-extraction-via-convnet

def main():
    dataset_path = "/path/to/Caltech-101"
    modelzoo_path = "/path/to/VGG16"
    
    # create an instance
    convnet = FeatureExtractor(
            prototxt_path=os.path.join(modelzoo_path, "vgg16_deploy.prototxt"),
            caffemodel_path=os.path.join(modelzoo_path, "vgg16.caffemodel"),
            target_layer_name="fc7",
            image_size=224,
            mean_values=[103.939, 116.779, 123.68])
    
    # header
    f = open("caltech101_vggnet_fc7_features.csv", "w")
    header = ["filepath"]
    for i in xrange(4096):
        header.append("feat%d" % (i+1))
    header = ",".join(header) + "\n"
    f.write(header)
    
    # extract features
    categories = os.listdir(dataset_path)
    for category in pyprind.prog_bar(categories):
        file_names = os.listdir(os.path.join(dataset_path, category))
        for file_name in file_names:
            img = cv2.imread(os.path.join(dataset_path, category, file_name))
            feat = convnet.transform(img)
            feat_str = [os.path.join(category, file_name)]
            for value in feat:
                feat_str.append(str(value))
            row = ",".join(feat_str)
            f.write("%s\n" % row)
            f.flush()

    f.close()

Esempio n. 16

0

Mostra file

def split_by_id(beatdf, id_field='ptid', frac_train=.6, frac_val=.15):
    """ Deterministically splits the beatdf by _patient_ """
    empis = np.sort(beatdf[id_field].unique())
    print("Splitting %d unique patients" % len(empis))

    # deterministic split
    rs = np.random.RandomState(0)
    perm_idx = rs.permutation(len(empis))
    num_train = int(frac_train * len(empis))
    num_val = int(frac_val * len(empis))
    train_idx = perm_idx[:num_train]
    val_idx = perm_idx[num_train:(num_train + num_val)]
    test_idx = perm_idx[(num_train + num_val):]
    empis_train = empis[train_idx]
    empis_val = empis[val_idx]
    empis_test = empis[test_idx]
    print(" ... patient splits: %d train, %d val, %d test " %
          (len(empis_train), len(empis_val), len(empis_test)))

    # make dictionaries
    train_dict = {e: "train" for e in empis_train}
    val_dict = {e: "val" for e in empis_val}
    test_dict = {e: "test" for e in empis_test}
    split_dict = {**train_dict, **val_dict, **test_dict}

    # add train/val test split to each
    split = []
    for e in pyprind.prog_bar(beatdf[id_field]):
        split.append(split_dict[e])

    beatdf['split'] = split
    return beatdf

Esempio n. 17

0

Mostra file

File: expose.py Progetto: tlvince/expose.py

def media_jobs(cfg, dry_run, is_video):
    if is_video:
        media_lc = 'video'
        media_uc = 'Video'
        src_media = src_videos
        media_targets = vid_targets
    else:
        media_lc = 'image'
        media_uc = 'Image'
        src_media = src_images
        media_targets = img_targets

    l.info('Generating {} jobs...'.format(media_lc))
    jobs = []
    skipped = 0

    si = src_media(cfg)
    if not si:
        l.debug('No source {}s'.format(media_lc))
        return

    for src in pyprind.prog_bar(si):
        j, s = media_targets(cfg, src, dry_run)
        jobs.extend(j)
        skipped += s

    l.info('{} jobs: running {}, skipped {}, total {}'
           .format(media_uc, len(jobs), skipped, len(jobs) + skipped))

    return jobs

Esempio n. 18

0

Mostra file

File: backup_tool.py Progetto: hsuchengmath/wiki-self-contradictory-NLI-Text-

def Train_Eval_Process_Layer_v2(train_X,train_Y,test_X,test_Y):
    # LSTM
    epoch_num = 10
    #model = LSTM_model(input_dim=8,hidden_dim=8)
    model = One_Sent2Other_Sent()
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCELoss()
    for epoch_  in pyprind.prog_bar(range(epoch_num)):
        model.train()
        for i in range(len(train_X)):
            X = torch.tensor(train_X[i])#.cuda()
            pred_train_Y = model(X)
            Y = torch.tensor([train_Y[i]])#.cuda()
            true_train_Y = Y.squeeze(dim=-1)
            loss = criterion(pred_train_Y, true_train_Y.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('loss:',loss)
        model.eval()
        pred_test_Y = list()
        for i in range(len(test_X)):
            X = torch.tensor(test_X[i])#.cuda()
            pred_test_Y_i = model(X).cpu().data.numpy().reshape(1,1)
            pred_test_Y.append(pred_test_Y_i)
        test_Y_hat = np.concatenate(pred_test_Y,0)
        test_Y_hat_list = list()
        for i in range(test_Y_hat.shape[0]):
            if test_Y_hat[i,0] >= 0.5:
                test_Y_hat_list.append(1)
            else:
                test_Y_hat_list.append(0)
        Evaluation(test_Y_hat_list,test_Y)

Esempio n. 19

0

Mostra file

    def write_db_to_brightway(self):
        for s in pyprind.prog_bar(self.scenarios.items()):
            scenario, year = s

            print('Write new database to Brightway2.')
            wurst.write_brightway2_database(
                self.db, "ecoinvent_" + scenario + "_" + str(year))

Esempio n. 20

0

Mostra file

File: extract_ecospold2.py Progetto: OcelotProject/Ocelot

def extract_ecospold2_directory(dirpath, use_mp=True):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    if os.name == 'nt':
        use_mp = False

    assert os.path.isdir(dirpath), "Can't find directory {}".format(dirpath)
    filelist = [os.path.join(dirpath, filename)
                for filename in os.listdir(dirpath)
                if filename.lower().endswith(".spold")
                ]

    print("Extracting {} undefined datasets".format(len(filelist)))

    if use_mp:
        start = time()
        # With code from
        # http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/
        with multiprocessing.Pool(
                processes=multiprocessing.cpu_count(),
                initializer=lambda : signal.signal(signal.SIGINT, signal.SIG_IGN)
            ) as pool:
            try:
                data = pool.map(generic_extractor, filelist)
            except KeyboardInterrupt:
                pool.terminate()
                raise KeyboardInterrupt
        print("Extracted {} undefined datasets in {:.1f} seconds".format(len(data), time() - start))
    else:
        data = [generic_extractor(fp)
                for fp in pyprind.prog_bar(filelist)]

    # Unroll lists of lists
    return [y for x in data for y in x]

Esempio n. 21

0

Mostra file

def validate_directory_against_xsd(dirpath, schema):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    assert os.path.isdir(dirpath), "Can't find data directory {}".format(
        dirpath)
    assert os.path.isfile(schema), "Can't find schema file {}".format(schema)

    filelist = [
        os.path.join(dirpath, filename) for filename in os.listdir(dirpath)
        if filename.lower().endswith(".spold")
    ]

    print(("Validating {} undefined datasets".format(len(filelist))))

    errors = []
    ecospold2_schema = etree.XMLSchema(etree.parse(open(schema)))

    for fp in pyprind.prog_bar(filelist):
        file = etree.parse(open(fp))
        if not ecospold2_schema.validate(file):
            errors.append(os.path.basename(fp))

    if errors:
        print("The following files did not validate:")
        pprint.pprint(errors)
    else:
        print("All files valid")

Esempio n. 22

0

Mostra file

def main():
    config = utils.Config()

    path_out = os.path.join(config.getpath("data"), "aarc_abst")
    utils.mkdir(path_out)

    filenames = os.listdir(config.getpath("aarc"))
    filenames = [n for n in filenames if n.endswith(".txt.utf8")]
    filenames.sort()

    nlp = spacy.load("en_core_web_sm",
                     disable=["tagger", "parser", "ner", "textcat"])

    cnt = 0
    for filename in pyprind.prog_bar(filenames):
        text = extract_abstract(os.path.join(config.getpath("aarc"), filename))
        if text == "":
            # print("No Abstract!: %s" % filename)
            continue
        with open(
                os.path.join(path_out,
                             filename.replace(".txt.utf8", ".doc.tokens")),
                "w") as f:
            doc = nlp(text)
            tokens = [token.text for token in doc]
            assert len(tokens) > 0
            tokens = " ".join(tokens)
            f.write("%s\n" % tokens)
        cnt += 1

    print("Processed %d/%d files" % (cnt, len(filenames)))

Esempio n. 23

0

Mostra file

File: db_importer.py Progetto: davidneudorfer/django-manolo

    def bulk_upload(self):
        items_to_upload = []
        append = items_to_upload.append

        credentials = get_db_credentials(self.settings)
        if 'sqlite3' in credentials['ENGINE']:
            db = dataset.connect("sqlite:///" + os.path.basename(credentials['NAME']))
        if 'postgresql' in credentials['ENGINE']:
            db = dataset.connect('postgresql://' +
                                 credentials['USER'] + ':' +
                                 credentials['PASSWORD'] + '@' +
                                 credentials['HOST'] + ':' +
                                 credentials['PORT'] + '/' +
                                 credentials['NAME'])
        table = db['visitors_visitor']

        print("Starting checks to see if we have this item in our database.")
        if len(self.items) == 0:
            print("Nothing to upload")
        else:
            for i in pyprind.prog_bar(range(len(self.items))):
                item = self.items[i]
                try:
                    item['date'] = datetime.datetime.strptime(
                        item['date'],
                        '%Y-%m-%d',
                        )
                except ValueError:
                    item['date'] = None

                append(item)

            print("uploading %i records for table %s" % (len(items_to_upload), self.mytable))

            table.insert_many(items_to_upload)

Esempio n. 24

0

Mostra file

File: load_data.py Progetto: Wiki-Contradictory/Wiki-Self-Contradictory

def Format_csv2XY(path):
    X, Y, title, self_contradictory_template, revision_id_list = list(), list(
    ), list(), list(), list()
    df = pd.read_csv(path)
    page_title = list(df['page_title'])
    revision_text = list(df['revision_text'])
    revision_id = list(df['revision_id'])
    for i in pyprind.prog_bar(range(len(revision_text))):
        self_contradictory_template_i = list()
        text = revision_text[i]
        title_i = page_title[i]
        revision_id_i = revision_id[i]
        if isinstance(text, str) is True and len(text.split()) != 0:
            wikicode = mwparserfromhell.parse(text)
            templates = wikicode.filter_templates()
            is_pos = False
            for j in range(len(templates)):
                if 'Self-contradictory' in templates[j]:
                    is_pos = True
                    self_contradictory_template_i.append(templates[j])
            if is_pos:
                X.append(str(text))
                title.append(title_i)
                Y.append(1)
            else:
                X.append(str(text))
                title.append(title_i)
                Y.append(0)
            self_contradictory_template.append(self_contradictory_template_i)
            revision_id_list.append(revision_id_i)
    return X, Y, title, self_contradictory_template, revision_id_list

Esempio n. 25

0

Mostra file

def Train_Eval_Process_Layer(train_X, train_Y, test_X, test_Y):
    # RetaGNN + Self Attention
    import pyprind
    import pickle
    epoch_num = 10
    input_dim = 8
    hidden_dim = 8
    model = double_LSTM_model().cuda()
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCELoss()
    for epoch_ in range(epoch_num):
        model.train()
        for i in pyprind.prog_bar(range(len(train_X))):
            batch_X, batch_Y = train_X[i], train_Y[i]  #(b,l,d) ,(b,)
            batch_Y_hat = model(batch_X).squeeze(dim=-1)
            loss = criterion(batch_Y_hat, batch_Y.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print('loss:',loss)
        model.eval()
        pred_Y = list()
        for i in range(len(test_X)):
            pred_Y.append(model(test_X[i]).view(1, -1))
        test_Y_hat = torch.cat(pred_Y, 0).cpu().data.numpy()
        test_Y_hat_list = list()
        for i in range(test_Y_hat.shape[0]):
            if test_Y_hat[i, 0] >= 0.5:
                test_Y_hat_list.append(1)
            else:
                test_Y_hat_list.append(0)
        Evaluation(test_Y_hat_list, test_Y)

Esempio n. 26

0

Mostra file

def extract_ecospold2_directory(dirpath, use_mp=True):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    if os.name == 'nt':
        use_mp = False

    assert os.path.isdir(dirpath), "Can't find directory {}".format(dirpath)
    filelist = [
        os.path.join(dirpath, filename) for filename in os.listdir(dirpath)
        if filename.lower().endswith(".spold")
    ]

    print(("Extracting {} undefined datasets".format(len(filelist))))

    if use_mp:
        start = time()
        # With code from
        # http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/
        with multiprocessing.Pool(processes=multiprocessing.cpu_count(),
                                  initializer=lambda: signal.signal(
                                      signal.SIGINT, signal.SIG_IGN)) as pool:
            try:
                data = pool.map(generic_extractor, filelist)
            except KeyboardInterrupt:
                pool.terminate()
                raise KeyboardInterrupt
        print(("Extracted {} undefined datasets in {:.1f} seconds".format(
            len(data),
            time() - start)))
    else:
        data = [generic_extractor(fp) for fp in pyprind.prog_bar(filelist)]

    # Unroll lists of lists
    return [y for x in data for y in x]

Esempio n. 27

0

Mostra file

File: jobCrawler.py Progetto: Stufinite/arrogant

    def getJob(self):
        job = []
        for i in range(1, 1000):
            if requests.get(
                    'https://www.yourator.co/api/v2/jobs?page={}'.format(
                        i)).json()['jobs'] == []:
                break
            job += requests.get(
                'https://www.yourator.co/api/v2/jobs?page={}'.format(
                    i)).json()['jobs']

        for i in pyprind.prog_bar(job):
            res = requests.get('https://www.yourator.co/' + i['path']).text
            soup = BeautifulSoup(res, "html.parser")
            i['inside'] = {}
            i['inside']['description'] = soup.select(
                '.description')[0].text.strip() if len(
                    soup.select('.description')) else ''
            for j in soup.select('.basic-info'):
                key, value = j.text.strip().replace(' ',
                                                    '').replace('\n',
                                                                '').split('：')
                i['inside'][key] = value

            if i['has_salary_info']:
                for j in soup.select('h2'):
                    if j.text == '薪資範圍':
                        i['salary'] = j.findNext('article').text
        with open('job.json', 'w') as f:
            json.dump(self.testData(job), f)

Esempio n. 28

0

Mostra file

File: prepare-finetuning-batchscript.py Progetto: zhaoyang1708/alpha_pooling

def calc_features(net, n_images, blobs):
    n_images = int(0.6 * n_images)
    batchsize = net.blobs['data'].data.shape[0]
    feats = dict()
    for blob in blobs:
        out_shape = list(net.blobs[blob].data.shape)
        out_shape[0] = n_images
        print('Will allocate {:.2f} GiB of memory'.format(
            np.prod(out_shape) * 2 / 1024 / 1024 / 1024))
        feats[blob] = np.zeros(
            tuple(out_shape),
            dtype=np.float16 if not blob == 'label' else np.int32)
    print('Need %.3f GiB' %
          (np.sum([x.nbytes for x in feats.values()]) / 1024 / 1024 / 1024))

    for it in pyprind.prog_bar(range(0, n_images, batchsize),
                               update_interval=10,
                               stream=sys.stderr):
        net.forward()
        for blob in blobs:
            feats[blob][it:it + batchsize,
                        ...] = net.blobs[blob].data[:feats[blob][it:it +
                                                                 batchsize,
                                                                 ...].shape[0],
                                                    ...]

    return [feats[blob] for blob in blobs]

Esempio n. 29

0

Mostra file

File: utils_conf.py Progetto: carlgogo/VIP

    def __new__(cls, iterable=None, desc=None, total=None, leave=True,
                backend=None, verbose=True):
        if backend is None:
            backend = Progressbar.backend

        if not verbose:
            backend = "hide"

        if backend == "tqdm":
            from tqdm import tqdm
            return tqdm(iterable=iterable, desc=desc, total=total, leave=leave,
                        ascii=True, ncols=80, file=sys.stdout,
                        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed"
                                   "}<{remaining}{postfix}]") # remove rate_fmt
        elif backend == "tqdm_notebook":
            from tqdm import tqdm_notebook
            return tqdm_notebook(iterable=iterable, desc=desc, total=total,
                                 leave=leave)
        elif backend == "pyprind":
            from pyprind import ProgBar, prog_bar
            ProgBar._adjust_width = lambda self: None  # keep constant width
            if iterable is None:
                return ProgBar(total, title=desc, stream=1)
            else:
                return prog_bar(iterable, title=desc, stream=1,
                                iterations=total)
        elif backend == "hide":
            return NoProgressbar(iterable=iterable)
        else:
            raise NotImplementedError("unknown backend")

Esempio n. 30

0

Mostra file

def evaluate(model, model_name, sents, ivocab):
    train = False
    loss = 0.0
    acc = 0.0
    count = 0
    vocab_size = model.vocab_size
    for data_i in pyprind.prog_bar(xrange(len(sents))):
        words = sents[data_i:data_i + 1]

        if model_name == "bd_lstm":
            xs, ms = utils.make_batch(words,
                                      train=train,
                                      tail=False,
                                      mask=True)
            ys = model.forward(xs=xs, ms=ms, train=train)
        else:
            xs = utils.make_batch(words, train=train, tail=False)
            ys = model.forward(ts=xs, train=train)

        ys = F.concat(ys, axis=0)
        ts = F.concat(xs, axis=0)
        ys = F.reshape(ys, (-1, vocab_size))
        ts = F.reshape(ts, (-1, ))

        loss += F.softmax_cross_entropy(ys, ts) * len(words[0])
        acc += F.accuracy(ys, ts, ignore_label=-1) * len(words[0])
        count += len(words[0])

    loss_data = float(cuda.to_cpu(loss.data)) / count
    acc_data = float(cuda.to_cpu(acc.data)) / count

    return loss_data, acc_data

Esempio n. 31

0

Mostra file

File: import_hojas_de_vida.py Progetto: Cristian1312/ventanita

    def handle(self, *args, **options):
        if options['tsvfile'] is None or options['sheet'] is None:
            error_msg = 'Enter name of tsv file and sheet number as argument.' \
                        ' "python manage.py import_hojas_de_vida --tsvfile=hoja0.tsv --sheet=0 --settings=ventanita.settings.local'
            raise CommandError(error_msg)

        tsv_file = options['tsvfile']
        sheet = options['sheet']
        self.sheet = sheet

        with codecs.open(tsv_file, "r") as file_handle:
            dump = file_handle.readlines()

        if sheet == '0':
            items = []
            for line in pyprind.prog_bar(dump):
                item = self.parse_line(line)
                if item is not None:
                    items.append(Candidato(**item))
            Candidato.objects.bulk_create(items)
        elif sheet == '1':
            self.import_institucion_educativa(dump)
            self.import_education_for_candidate(dump)
        elif sheet == '2':
            self.import_institucion_educativa_superior(dump)
            self.import_education_for_candidate(dump)

Esempio n. 32

0

Mostra file

File: validate_ecospold2.py Progetto: OcelotProject/Ocelot

def validate_directory_against_xsd(dirpath, schema):
    """Extract all the ``.spold`` files in the directory ``dirpath``.

    Use a multiprocessing pool if ``use_mp``, which is the default."""
    assert os.path.isdir(dirpath), "Can't find data directory {}".format(dirpath)
    assert os.path.isfile(schema), "Can't find schema file {}".format(schema)

    filelist = [os.path.join(dirpath, filename)
                for filename in os.listdir(dirpath)
                if filename.lower().endswith(".spold")
                ]

    print("Validating {} undefined datasets".format(len(filelist)))

    errors = []
    ecospold2_schema = etree.XMLSchema(etree.parse(open(schema)))

    for fp in pyprind.prog_bar(filelist):
        file = etree.parse(open(fp))
        if not ecospold2_schema.validate(file):
            errors.append(os.path.basename(fp))

    if errors:
        print("The following files did not validate:")
        pprint.pprint(errors)
    else:
        print("All files valid")

Esempio n. 33

0

Mostra file

File: plot_histogram_of_sentence_length.py Progetto: AhmedYounes94/WON

def count_sentence_length(corpus, count):
    for s in pyprind.prog_bar(corpus):
        length = len(s)
        if length >= len(count):
            continue
        count[length] += 1
    return count

Esempio n. 34

0

Mostra file

    def track_progress(self, noisy_grad, filtered_grad):

        # if function passed in --- save values
        if self.fun is not None:
            self.fun_vals.append(self.fun(self.params, self.t))

        # report on gradient
        if self.callback is not None:
            self.callback(self.params, self.t, noisy_grad)

        # update object attributes
        if self.save_params:
            self.param_trace.append(self.params.copy())

        if self.save_grads:
            self.grad_trace.append(noisy_grad)

        if self.save_filtered_grads:
            self.filtered_grad_trace.append(filtered_grad)

        if self.true_grad_fun is not None:
            true_grad = self.true_grad_fun(self.params, self.t)
            self.true_grad_trace.append(true_grad)

        if (self.num_marginal_samples_to_save > 0) and \
           (self.t % self.marginal_sample_skip == 0):
            nms = self.num_marginal_samples_to_save
            print "  ... saving %d marginal samples (iter %d)" % (nms, self.t)
            msamps = np.array([
                self.grad_fun(self.params, self.t)
                for _ in pyprind.prog_bar(xrange(nms))
            ])
            self.marginal_samples[self.t] = msamps

Esempio n. 35

0

Mostra file

File: utils_conf.py Progetto: r4lv/VIP

    def __new__(cls, iterable=None, desc=None, total=None, leave=True,
                backend=None, verbose=True):
        if backend is None:
            backend = Progressbar.backend

        if not verbose:
            backend = "hide"

        if backend == "tqdm":
            from tqdm import tqdm
            return tqdm(iterable=iterable, desc=desc, total=total, leave=leave,
                        ascii=True, ncols=80, file=sys.stdout,
                        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed"
                                   "}<{remaining}{postfix}]") # remove rate_fmt
        elif backend == "tqdm_notebook":
            from tqdm import tqdm_notebook
            return tqdm_notebook(iterable=iterable, desc=desc, total=total,
                                 leave=leave)
        elif backend == "pyprind":
            from pyprind import ProgBar, prog_bar
            ProgBar._adjust_width = lambda self: None  # keep constant width
            if iterable is None:
                return ProgBar(total, title=desc, stream=1)
            else:
                return prog_bar(iterable, title=desc, stream=1,
                                iterations=total)
        elif backend == "hide":
            return NoProgressbar(iterable=iterable)
        else:
            raise NotImplementedError("unknown backend")

Esempio n. 36

0

Mostra file

def parse(model, decoder, dataset, path_pred):
    """
    :type model: SpanBasedModel
    :type decoder: IncrementalCKYDecoder
    :type dataset: numpy.ndarray
    :type path_pred: str
    :rtype: None
    """
    with open(path_pred, "w") as f:

        for data in pyprind.prog_bar(dataset):
            edu_ids = data.edu_ids
            edus = data.edus
            edus_postag = data.edus_postag
            edus_head = data.edus_head
            sbnds = data.sbnds
            pbnds = data.pbnds

            # Feature extraction
            edu_vectors = model.forward_edus(edus, edus_postag,
                                             edus_head)  # (n_edus, bilstm_dim)
            padded_edu_vectors = model.pad_edu_vectors(
                edu_vectors)  # (n_edus+2, bilstm_dim)
            mask_bwd, mask_fwd = model.make_masks(
            )  # (1, bilstm_dim), (1, bilstm_dim)

            # Parsing (bracketing)
            span_scores = precompute_all_span_scores(
                model=model,
                edus=edus,
                edus_postag=edus_postag,
                sbnds=sbnds,
                pbnds=pbnds,
                padded_edu_vectors=padded_edu_vectors,
                mask_bwd=mask_bwd,
                mask_fwd=mask_fwd)
            unlabeled_sexp = decoder.decode(span_scores=span_scores,
                                            inputs=edu_ids,
                                            sbnds=sbnds,
                                            pbnds=pbnds,
                                            use_sbnds=True,
                                            use_pbnds=True)  # list of str
            unlabeled_tree = treetk.sexp2tree(unlabeled_sexp,
                                              with_nonterminal_labels=False,
                                              with_terminal_labels=False)
            unlabeled_tree.calc_spans()
            unlabeled_spans = treetk.aggregate_spans(
                unlabeled_tree, include_terminal=False,
                order="pre-order")  # list of (int, int)

            # Parsing (assigning majority labels to the unlabeled tree)
            span2label = {(b, e): "<ELABORATION,N/S>"
                          for (b, e) in unlabeled_spans}
            labeled_tree = treetk.assign_labels(unlabeled_tree,
                                                span2label,
                                                with_terminal_labels=False)
            labeled_sexp = treetk.tree2sexp(labeled_tree)

            f.write("%s\n" % " ".join(labeled_sexp))

Esempio n. 37

0

Mostra file

File: inverter_crawler.py Progetto: yingdanchu/project

def crawler(url, start_page, end_page):
    with open("output.json", "w") as f:
        #開瀏覽器
        browser = webdriver.Firefox()
        #取得網址
        browser.get(url)
        #取得"產品總覽"超連結
        res = browser.find_element_by_id('ContentPlaceHolder1_LinkButton11')
        #點下去
        res.click()
        #crawl from start_page to end_page
        for i in pyprind.prog_bar(range(start_page, end_page + 1)):
            #排除掉第一頁
            if (i != 1):
                #找到下一頁的按鈕
                res = browser.find_element_by_link_text(str(i))
                #按下去
                res.click()
            #get the source of page
            pagesource = browser.page_source
            #get the contain of website
            soup = BeautifulSoup(pagesource, "lxml")
            #get the table
            table = soup.find('table',
                              attrs={'id': 'ContentPlaceHolder1_GVTABPRO'})
            #get the rows of table
            rows = table.find_all('tr')
            index = 0
            for row in rows:
                #index == 1 means it's the first col
                if (index == 0):
                    cols = row.find_all('th')
                    colname = [element.text.strip() for element in cols]
                    index = index + 1
                else:
                    #get the cols from rows
                    cols = row.find_all('td')
                    #the elements of table is stored in cols now
                    cols = [element.text.strip() for element in cols]
                    #the row of pages
                    if (cols[0] == '12345678910'):
                        break
                    #store the cols into data
                    #data is the type of dict
                    data = {
                        str(colname[0]): cols[0],
                        str(colname[1]): cols[1],
                        str(colname[2]): cols[2],
                        str(colname[3]): cols[3],
                        str(colname[4]): cols[4],
                        str(colname[5]): cols[5],
                        str(colname[6]): cols[6],
                        str(colname[7]): cols[7],
                        str(colname[8]): cols[8]
                    }
                    #store into dataout
                    dataout.append(data)
        browser.close()
        f.write(json.dumps(dataout))

Esempio n. 38

0

Mostra file

File: fb.py Progetto: david30907d/Python_Crawler

def crawl(i):
	info = graph.get_object(i)
	print(info)
	posts = graph.get_connections(i, 'posts')
	for p in pyprind.prog_bar(posts['data']):
		p['reactions'] = graph.get_connections(p['id'], 'reactions')
		p['comments'] = graph.get_connections(p['id'], 'comments')
	json.dump(posts, open('facebook.json', 'w'))

Esempio n. 39

0

Mostra file

File: misc.py Progetto: andim/mise

def progressbar(iterator):
    # if available add progress indicator
    try:
        import pyprind
        iterator = pyprind.prog_bar(iterator)
    except:
        pass
    return iterator

Esempio n. 40

0

Mostra file

File: import_hojas_de_vida.py Progetto: Cristian1312/ventanita

 def import_institucion_educativa_superior(self, dump):
     instituciones = []
     lines = self.convert_to_lines(dump)
     for line in pyprind.prog_bar(
             lines, monitor=True, title="Importing high studies for candidate"):
         this_inst_edu = get_institucion_superior(line)
         if this_inst_edu not in instituciones:
             instituciones.append(this_inst_edu)
     upload_instituciones(instituciones)

Esempio n. 41

0

Mostra file

File: inspect_parks.py Progetto: mplewis/park-ratings

def inspect_parks(parks, output_dir):
    """Request data for each park, process it, and write it to disk."""
    bar = pyprind.ProgBar(len(parks))
    for park in pyprind.prog_bar(parks):
        data = inspect_park(park)
        fn = join(output_dir, '{}.json'.format(park.id))
        with open(fn, 'w') as f:
            json.dump(data, f)
        bar.update(item_id=park.name[:20])

Esempio n. 42

0

Mostra file

File: ddpg.py Progetto: CoderHHX/incubator-mxnet

    def train(self):

        memory = ReplayMem(
            obs_dim=self.env.observation_space.flat_dim,
            act_dim=self.env.action_space.flat_dim,
            memory_size=self.memory_size)

        itr = 0
        path_length = 0
        path_return = 0
        end = False
        obs = self.env.reset()

        for epoch in xrange(self.n_epochs):
            logger.push_prefix("epoch #%d | " % epoch)
            logger.log("Training started")
            for epoch_itr in pyprind.prog_bar(range(self.epoch_length)):
                # run the policy
                if end:
                    # reset the environment and stretegy when an episode ends
                    obs = self.env.reset()
                    self.strategy.reset()
                    # self.policy.reset()
                    self.strategy_path_returns.append(path_return)
                    path_length = 0
                    path_return = 0
                # note action is sampled from the policy not the target policy
                act = self.strategy.get_action(obs, self.policy)
                nxt, rwd, end, _ = self.env.step(act)

                path_length += 1
                path_return += rwd

                if not end and path_length >= self.max_path_length:
                    end = True
                    if self.include_horizon_terminal:
                        memory.add_sample(obs, act, rwd, end)
                else:
                    memory.add_sample(obs, act, rwd, end)

                obs = nxt

                if memory.size >= self.memory_start_size:
                    for update_time in xrange(self.n_updates_per_sample):
                        batch = memory.get_batch(self.batch_size)
                        self.do_update(itr, batch)

                itr += 1

            logger.log("Training finished")
            if memory.size >= self.memory_start_size:
                self.evaluate(epoch, memory)
            logger.dump_tabular(with_prefix=False)
            logger.pop_prefix()

Esempio n. 43

0

Mostra file

File: image_barcode_gen.py Progetto: NickHurst/MovieBarcodeCreator

def create_final_image_barcode(pieces_width, final_width, height, fname, images):
    bc = Image.new('RGB', (pieces_width, height))
    
    posx = 0
    for img in pyprind.prog_bar(images):
        bc.paste(img[0], (posx, 0))
        posx += img[1]

    os.chdir('..')
    bc = bc.resize((final_width, height), Image.ANTIALIAS)
    bc.save(fname, 'PNG')

Esempio n. 44

0

Mostra file

File: first_order_optimizer.py Progetto: QuantCollective/maml_rl

    def optimize_gen(self, inputs, extra_inputs=None, callback=None, yield_itr=None):

        if len(inputs) == 0:
            # Assumes that we should always sample mini-batches
            raise NotImplementedError

        f_opt = self._opt_fun["f_opt"]
        f_loss = self._opt_fun["f_loss"]

        if extra_inputs is None:
            extra_inputs = tuple()

        last_loss = f_loss(*(tuple(inputs) + extra_inputs))

        start_time = time.time()

        dataset = BatchDataset(
            inputs, self._batch_size,
            extra_inputs=extra_inputs
            #, randomized=self._randomized
        )

        itr = 0
        for epoch in pyprind.prog_bar(list(range(self._max_epochs))):
            for batch in dataset.iterate(update=True):
                f_opt(*batch)
                if yield_itr is not None and (itr % (yield_itr+1)) == 0:
                    yield
                itr += 1

            new_loss = f_loss(*(tuple(inputs) + extra_inputs))
            if self._verbose:
                logger.log("Epoch %d, loss %s" % (epoch, new_loss))

            if self._callback or callback:
                elapsed = time.time() - start_time
                callback_args = dict(
                    loss=new_loss,
                    params=self._target.get_param_values(trainable=True) if self._target else None,
                    itr=epoch,
                    elapsed=elapsed,
                )
                if self._callback:
                    self._callback(callback_args)
                if callback:
                    callback(**callback_args)

            if abs(last_loss - new_loss) < self._tolerance:
                break
            last_loss = new_loss

Esempio n. 45

0

Mostra file

File: char_video.py Progetto: lamontu/starter

 def genCharVideo(self, filepath):
     self.charVideo = []
     cap = cv2.VideoCapture(filepath)
     self.timeInterval = round(1 / cap.get(5), 3)
     nf = int(cap.get(7))
     print("Generate char video, please wait...")
     if cap.isOpened():
         for i in pyprind.prog_bar(range(nf)):
             ret, vframe = cap.read()
             if ret:
                 rawFrame = cv2.cvtColor(vframe, cv2.COLOR_BGR2GRAY)
                 frame = self.convert(rawFrame, os.get_terminal_size(), fill=True)
                 self.charVideo.append(frame)
         cap.release()

Esempio n. 46

0

Mostra file

File: primerfinder.py Progetto: manutamminen/epride

def epic_ixs(primers, interval=80, search_range=30):
    """ Find triplets of indices among primer candidates that are on the average 80 bases apart
        with flexibility of 30 bases.
    """
    starts = list(map(list, zip(*primers)))[0]
    for start1 in pyprind.prog_bar(starts):
        start2 = start1 + interval
        start3 = start2 + interval
        for ix1 in range(-search_range, search_range):
            str2 = start2 + ix1
            for ix2 in range(-search_range, search_range):
                str3 = start3 + ix2
                if str2 in starts and str3 in starts:
                    yield(start1, str2, str3)

Esempio n. 47

0

Mostra file

File: import_hojas_de_vida.py Progetto: Cristian1312/ventanita

    def import_institucion_educativa(self, dump):
        instituciones = []
        for line in pyprind.prog_bar(dump):
            fields = line.strip().split('\t')

            this_inst_edu = get_institucion_primaria(fields)
            if this_inst_edu not in instituciones:
                instituciones.append(this_inst_edu)

            this_inst_edu = get_institucion_secundaria(fields)
            if this_inst_edu not in instituciones:
                instituciones.append(this_inst_edu)

        upload_instituciones(instituciones)

Esempio n. 48

0

Mostra file

File: models.py Progetto: HIPS/DESI-MCMC

    def render_model_image(self, fimg, xlim=None, ylim=None, exclude=None):
        # create model image, and add each patch in - init with sky noise
        mod_img     = np.ones(fimg.nelec.shape) * fimg.epsilon
        source_list = [s for s in self.srcs if s is not exclude]

        if not len(source_list) == 0:
            # add each source's model patch
            for s in pyprind.prog_bar(source_list):
                patch, ylim, xlim = s.compute_model_patch(fits_image=fimg, xlim=xlim, ylim=ylim)
                mod_img[ylim[0]:ylim[1], xlim[0]:xlim[1]] += patch

        if xlim is not None and ylim is not None:
            mod_img = mod_img[ylim[0]:ylim[1], xlim[0]:xlim[1]]

        return mod_img

Esempio n. 49

0

Mostra file

File: color_barcode_gen.py Progetto: NickHurst/MovieBarcodeCreator

def create_color_barcode(colors, bar_width, height, width, fname):
    barcode_width = len(colors) * bar_width
    bc = Image.new('RGB', (barcode_width, height))
    draw = ImageDraw.Draw(bc)

    # draw the new barcode
    posx = 0
    print('Generating barcode...')
    for color in pyprind.prog_bar(colors):
        draw.rectangle([posx, 0, posx + bar_width, height], fill=color)
        posx += bar_width

    del draw

    bc = bc.resize((width, height), Image.ANTIALIAS)
    bc.save(fname, 'PNG')

Esempio n. 50

0

Mostra file

File: import_hojas_de_vida.py Progetto: Cristian1312/ventanita

    def import_education_for_candidate(self, dump):
        estudios = []
        lines = self.convert_to_lines(dump)
        for line in pyprind.prog_bar(
                lines, monitor=True, title="Importing studies for candidate"):
            if self.sheet == '2':
                e = self.construct_education_obj(line, 'superior')
                estudios.append(e)
            elif self.sheet == '1':
                e = self.construct_education_obj(line, 'primaria')
                if e.inicio != '0':
                    estudios.append(e)

                e = self.construct_education_obj(line, 'secundaria')
                if e.inicio != '0':
                    estudios.append(e)

        Estudio.objects.bulk_create(estudios)

Esempio n. 51

0

Mostra file

File: image_barcode_gen.py Progetto: NickHurst/MovieBarcodeCreator

def spawn_image_threads(num_threads, fname, bar_width, height, width):
    # change directories if it already isn't in frames
    if not 'frames' in os.getcwd():
        os.chdir('frames')

    q = queue.Queue()

    # get a distributed list of images for the threads
    images = helpers.distribute_frame_lists(num_threads)
    
    threads = []
    for i in range(num_threads):
        t_fname = 'thread_{}_barcode.png'.format(i)
        thread = threading.Thread(target=create_thread_barcode, 
                                  args=(bar_width, height, t_fname, images[i], i, q))
        threads.append(thread)


    # stitch together several smaller barcodes on seperate threads
    # to speed up the process
    print('{} threads creating barcodes with {} frames each...'.format(num_threads, len(images[0])))
    print('Progress bar may take a while to start moving if there are a lot of frames.')
    for thread in threads:
        # thread.daemon = True
        thread.start()

    pieces_width = 0
    # a list to put the thread results in the correct order
    thread_results = [None] * num_threads 
    for i in pyprind.prog_bar(range(num_threads)):
        result = q.get()
        thread_results[result[0]] = [result[1], result[2]]
        pieces_width += result[2]

    # then finally stitch together all the pices that the threads
    # generated
    print('Generating final barcode...')
    create_final_image_barcode(pieces_width, width, height, fname, thread_results)

    # delete thread pieces
    for i in range(num_threads):
        os.remove('frames/thread_{}_barcode.png'.format(i))

    return

Esempio n. 52

0

Mostra file

File: validate_ecospold2.py Progetto: OcelotProject/Ocelot

def validate_directory(dirpath):
    data, errors = extract_directory(dirpath, False), {}
    print("Validating datasets:")
    for ds in pyprind.prog_bar(data):
        try:
            dataset_schema(ds)
        except Invalid as err:
            errors[err.msg] = {"path": err.path, "dataset": ds}
    if errors:
        logfile = "ocelot-validation-errors.log"
        errors = [(k, v['path'], v['dataset']) for k, v in errors.items()]
        print("{} errors found.\nSee error logfile {} for details.".format(
            len(errors), logfile)
        )
        with open(logfile, "w", encoding='utf-8') as f:
            f.write("Internal validation errors for extracted directory:\n{}\n".format(dirpath))
            f.write(pprint.pformat(errors, width=120, compact=True))
    else:
        print("No errors found")

Esempio n. 53

0

Mostra file

File: vae.py Progetto: andymiller/vae-flow

    def fit(num_epochs, minibatch_size, L, optimizer, sess):
        num_batches = N // minibatch_size

        # set up cost function and updates
        if load_data:
            idx      = tf.placeholder(tf.int32, name='idx')
            mbsize   = tf.constant(minibatch_size)
            xdimsize = tf.constant(xdim)
            x_batch  = tf.slice(X_all, tf.pack([idx*mbsize, 0]),
                                       tf.pack([mbsize,xdimsize]), name='x_batch')
        else:
            x_batch  = tf.placeholder(tf.float32, shape=[minibatch_size, xdim],
                                      name='X')
        cost = -tf.reduce_mean(vlb(x_batch, L)) * N
        train_step = optimizer.minimize(cost)

        sess.run(tf.initialize_variables(ut.nontrainable_variables()))

        def train(bidx):
            if load_data:
                train_step.run(feed_dict={idx:bidx}, session=sess)
                return cost.eval(feed_dict={idx:bidx}, session=sess)
            else:
                xb = X[bidx*minibatch_size:(bidx+1)*minibatch_size]
                train_step.run(feed_dict={x_batch: xb}, session=sess)
                return cost.eval(feed_dict={x_batch: xb}, session=sess)

        start = time()
        for i in xrange(num_epochs):
            bidxs = npr.permutation(num_batches)
            vals = [train(bidx) for bidx in pyprind.prog_bar(bidxs)]
            print 'epoch {:>4} of {:>4}: {:> .6}' . \
                    format(i+1, num_epochs, np.median(vals[-10:]))
            if callback:
                callback(i)

            # will tell you what nodes are being added
            #tf.get_default_graph().finalize()

        stop = time()
        print 'cost {}, {:>5} sec per update, {:>5} sec total\n'.format(
            np.median(vals[-10:]), (stop - start) / N, stop - start)

Esempio n. 54

0

Mostra file

File: dump_similar_words.py Progetto: norikinishida/word_evaluation

def main(args):
    path = args.path
    dim = args.dim
    topk = args.topk
    output = args.output

    word2vec = word_evaluation.load_word2vec(path=path, dim=dim)
    vocab = word2vec.keys()
    wrapper = word_evaluation.Wrapper(word2vec)

    with open(output, "w") as f:
        word_i = 0
        vocab_size = len(vocab)
        for word in pyprind.prog_bar(vocab):
            retrieved = wrapper.most_similar(positives=[word], negatives=[], K=topk)
            res = [w for w, s in retrieved]
            res = " ".join(res)
            f.write("[%d/%d: %s]: %s\n" % (word_i+1, vocab_size, word, res))
            f.flush()
            word_i += 1

Esempio n. 55

0

Mostra file

File: parse_arxiv.py Progetto: NYU-MSDSE-SWG/arxiv-search

def subject_verify(new_arxiv):
    if new_arxiv.count > 0:
        subject_list = copy.copy(new_arxiv.subject)
        remove_list = []
        new_ver = arxiv(new_arxiv.author)
        new_ver.parse()
        for count in pyprind.prog_bar(range(len(new_ver.title))):
            if len(set(subject_list) & set(new_ver.category[count])) == 0:
                remove_list.append(count)
        new_ver.arxiv_id = (np.delete(np.array(new_ver.arxiv_id), remove_list, axis=0)).tolist()
        new_ver.time = (np.delete(np.array(new_ver.time), remove_list, axis=0)).tolist()
        new_ver.title = (np.delete(np.array(new_ver.title), remove_list, axis=0)).tolist()
        new_ver.category = (np.delete(np.array(new_ver.category), remove_list, axis=0)).tolist()
        new_ver.pdf = (np.delete(np.array(new_ver.pdf), remove_list, axis=0)).tolist()
        new_ver.contributor = (np.delete(np.array(new_ver.contributor), remove_list, axis=0)).tolist()
        new_ver.count = len(new_ver.title)
        new_ver.subject = combine_subject(new_ver.category)
        print('Remove %d articles' % len(remove_list))
        return new_ver
    else:
        return new_arxiv

Esempio n. 56

0

Mostra file

File: parse_arxiv.py Progetto: NYU-MSDSE-SWG/arxiv-search

 def institution_verify(self, save=False, institution=['nyu', 'new york university']):
     if self.count != 0:
         remove_list = []
         if save == True and not os.path.exists('./paper/%s/' %self.author):
             os.makedirs('./paper/%s/' %self.author)
         for count in pyprind.prog_bar(range(len(self.pdf))):
             os.system('wget -q -U "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 '
                       'Firefox/3.6.3" -O ./check.pdf %s' %self.pdf[count])
             if save == True:
                 #os.system('cp ./check.pdf ./paper/%s/%s.pdf' %(self.author, self.arxiv_id[count]))
                 if len(self.arxiv_id[count].split('/')) >1 :
                     temp_dir = self.arxiv_id[count].split('/')[0]
                     if not os.path.exists('./paper/%s/%s/' % (self.author, temp_dir)):
                         os.makedirs('./paper/%s/%s/' % (self.author, temp_dir))
                 shutil.copy('./check.pdf', './paper/%s/%s.pdf' %(self.author, self.arxiv_id[count]))
             try:
                 text = convert('./check.pdf', pages=[0,1,2]).lower()
                 match_flag = False
                 for match_text in institution:
                     if text.find(match_text) != -1:
                         match_flag = True
                         break
                 if match_flag == True:
                     continue
                 else:
                     remove_list.append(count)
             except:
                 print("Can not read file %s" % self.arxiv_id[count])
                 remove_list.append(count)
                 continue
         os.system("rm ./check.pdf")
         self.arxiv_id = (np.delete(np.array(self.arxiv_id), remove_list, axis=0)).tolist()
         self.time = (np.delete(np.array(self.time), remove_list, axis=0)).tolist()
         self.title = (np.delete(np.array(self.title), remove_list, axis=0)).tolist()
         self.category = (np.delete(np.array(self.category), remove_list, axis=0)).tolist()
         self.pdf = (np.delete(np.array(self.pdf), remove_list, axis=0)).tolist()
         self.contributor = (np.delete(np.array(self.contributor), remove_list, axis=0)).tolist()
         self.count = len(self.title)
         self.subject = combine_subject(self.category)
         print('Remove %d articles' % len(remove_list))

Esempio n. 57

0

Mostra file

File: api.py Progetto: carlosp420/elum

def complete_me(content_as_list, output_filename, email):
    """
    Add metadata to the blast output file. Metadata is obtained by querying the
    NCBI database.

    :param content_as_list: blast output content (CSV file) as list of lines.
    :param output_filename: write line by line.
    """
    Entrez.email = email

    for i in pyprind.prog_bar(range(len(content_as_list))):
        line = content_as_list[i]
        line = line.strip()
        if line.startswith('query'):
            with open(output_filename, 'w') as handle:
                handle.write(line + '\tGeneLength\tTitle\n')
            continue

        line_complement = _get_metadata_as_string(line)

        with open(output_filename, 'a') as handle:
            handle.write(line + '\t' + line_complement + '\n')

Esempio n. 58

0

Mostra file

File: color_gen.py Progetto: NickHurst/MovieBarcodeCreator

def spawn_threads(threads, kmeans):
    # change directories if it already isn't in frames
    if not 'frames' in os.getcwd():
        os.chdir('frames')

    q = queue.Queue()
    num_threads = threads

    # get a distributed list of images for the threads
    images = helpers.distribute_frame_lists(num_threads)

    threads = []
    for i in range(num_threads):
        if kmeans:
            thread = threading.Thread(target=kc.get_image_colors,
                                      args=(i, q, images[i]))
        else:
            thread = threading.Thread(target=pc.get_image_colors,
                                      args=(i, q, images[i]))

        threads.append(thread)

    print('{} threads generating frame colors with {} frames each...'.format(num_threads, len(images[0])))
    for thread in threads:
        thread.daemon = True
        thread.start()

    thread_results = [None] * num_threads
    for i in pyprind.prog_bar(range(num_threads)):
        result = q.get()
        thread_results[result[0]] = result[1]

    # return to the original directory
    os.chdir('..')

    return [item for sublist in thread_results for item in sublist]

Esempio n. 59

0

Mostra file

File: run_celeste.py Progetto: HIPS/DESI-MCMC

    bsrcs = ssrcs[38:39] + gsrcs[38:39]
    bidx  = np.concatenate([sidx[38:39], gidx[38:39]])

    # breadcrumbs - make sure we can examine which source corresponds to
    # which catalog entry
    blocs = np.array([s.params.u for s in bsrcs])
    plocs = primary_field_df[['ra', 'dec']].values[bidx,:]
    assert np.allclose(blocs, plocs), "not the same location! noooo"

    ######################################
    # gibbs step on a handful of sources #
    ######################################
    print "======= running celeste sampler ========"
    # do some resampling, each source keeps each sample
    Nsamps = 10
    for i in pyprind.prog_bar(xrange(Nsamps)):
        # resample photon images
        model.field_list[0].resample_photons(bsrcs, verbose=True)
        # resample source params
        for s in pyprind.prog_bar(bsrcs):
            s.resample()
            s.store_sample()
            s.store_loglike()
        # global/local update
        #for s in bsrcs:
        #    s.sample_type()
        # global updates
        #model.sample_birth()
        #model.sample_death()

    ########################################

Esempio n. 60

0

Mostra file

File: util.py Progetto: proboscis/pyutil

 def __init__(self, items):
     from pyprind import prog_bar
     self.bar = prog_bar(items)