Ejemplo n.º 1
0
def test_initial():
    """
    Make sure a question mark is printed if the total is unknown
    """
    file = cStringIO()
    prog = ProgIter(initial=9001, file=file, show_times=False, clearline=False)
    message = prog.format_message()
    assert strip_ansi(message) == ' 9001/?... \n'
Ejemplo n.º 2
0
def test_disabled():
    prog = ProgIter(range(20), enabled=True)
    prog.begin()
    assert prog.started

    prog = ProgIter(range(20), enabled=False)
    prog.begin()
    prog.step()
    assert not prog.started
Ejemplo n.º 3
0
def multiple(size,
             iters,
             pE=0,
             pX=0,
             pZ=0,
             plot_load=False,
             qres=None,
             worker=0,
             seeds=None,
             config=None,
             **kwargs):
    """
    Runs the peeling decoder for a number of iterations. The graph is reused for speedup.
    """
    # import uf config
    if config is None:
        config = decoder_config()

    if seeds is None:
        seeds = [
            te.init_random_seed(worker=worker, iteration=iter)
            for iter in range(iters)
        ]

    graph = go.init_toric_graph(size)
    result = [
        single(size, pE, pX, pZ, plot_load, graph, worker, i, seed, config)
        for i, seed in ProgIter(zip(range(iters), seeds))
    ]

    N_succes = sum(result)
    if qres is not None:
        qres.put(N_succes)
    else:
        return N_succes
Ejemplo n.º 4
0
def mols2graphs(mols):
    """
    inputs
      mols: a list of molecules
    outputs
      cand_graphs: a list of dgl graphs 
    """
    graphs = []
    for mol in ProgIter(mols):
        n_atoms = mol.GetNumAtoms()
        g = DGLGraph()
        node_feats = []
        for i, atom in enumerate(mol.GetAtoms()):
            assert i == atom.GetIdx()
            node_feats.append(atom_features(atom))
        g.add_nodes(n_atoms)
        bond_src = []
        bond_dst = []
        for i, bond in enumerate(mol.GetBonds()):
            a1 = bond.GetBeginAtom()
            a2 = bond.GetEndAtom()
            begin_idx = a1.GetIdx()
            end_idx = a2.GetIdx()
            bond_src.append(begin_idx)
            bond_dst.append(end_idx)
            bond_src.append(end_idx)
            bond_dst.append(begin_idx)
        g.add_edges(bond_src, bond_dst)

        g.ndata['h'] = torch.Tensor([a.tolist() for a in node_feats])
        graphs.append(g)
    return graphs
Ejemplo n.º 5
0
def multiple(iters, size, p, worker=0, qres=None):

    graph = go.init_toric_graph(size)
    results = [single(graph, p, it, worker) for it in ProgIter(range(iters))]

    if qres is not None:
        qres.put(results)
    else:
        return results
Ejemplo n.º 6
0
def smiles2mols(smiles):
    mols = []
    for sm in ProgIter(smiles):
        mol = get_mol(sm)
        if mol is not None:
            mols.append(mol)
        else:
            print('Could not construct a molecule:', sm)
    return mols
Ejemplo n.º 7
0
def test_rate_format():
    # Define a function that takes some time
    file = cStringIO()
    prog = ProgIter(file=file)
    prog.begin()

    prog._iters_per_second = .000001
    msg = prog.format_message()
    rate_part = msg.split('rate=')[1].split(' Hz')[0]
    assert rate_part == '1e-06'

    prog._iters_per_second = .1
    msg = prog.format_message()
    rate_part = msg.split('rate=')[1].split(' Hz')[0]
    assert rate_part == '0.10'

    prog._iters_per_second = 10000
    msg = prog.format_message()
    rate_part = msg.split('rate=')[1].split(' Hz')[0]
    assert rate_part == '10000.00'
Ejemplo n.º 8
0
def test_unknown_total():
    """
    Make sure a question mark is printed if the total is unknown
    """
    iterable = (_ for _ in range(0, 10))
    file = cStringIO()
    prog = ProgIter(iterable,
                    desc='unknown seq',
                    file=file,
                    show_times=False,
                    verbose=1)
    for n in prog:
        pass
    file.seek(0)
    got = [line.strip() for line in file.readlines()]
    # prints an eroteme if total is unknown
    assert len(got) > 0, 'should have gotten something'
    assert all('?' in line for line in got), 'all lines should have an eroteme'
Ejemplo n.º 9
0
def read_terrible_json(path):
    """ Reads a slightly malformed json file 
    where each line is a different json dict.
    
    Args:
        path (string): the filepath to read from
    
    Returns:
        [dict]: list of dictionaries
    """
    with open(path, "rt") as f:
        lines = []
        test_read_lines = [x for x in f.readlines()]
        for x in ProgIter(test_read_lines):
            if x:
                x = x.replace("/", "\/")
                x = json.loads(x)
                lines.append(x)
    return lines
Ejemplo n.º 10
0
def test_clearline():
    """
    Make sure a question mark is printed if the total is unknown

    pytest tests/test_progiter.py::test_clearline
    """
    file = cStringIO()
    # Clearline=False version should simply have a newline at the end.
    prog = ProgIter(file=file, show_times=False, clearline=False)
    message = prog.format_message()
    assert strip_ansi(message).strip(' ') == '0/?... \n'
    # Clearline=True version should carrage return at the begining and have no
    # newline at the end.
    prog = ProgIter(file=file, show_times=False, clearline=True)
    message = prog.format_message()
    assert strip_ansi(message).strip(' ') == '\r    0/?...'
Ejemplo n.º 11
0
def test_progiter_offset_0():
    """
    pytest -s  ~/code/progiter/tests/test_progiter.py::test_progiter_offset_0
    """
    # Define a function that takes some time
    file = cStringIO()
    for _ in ProgIter(range(10),
                      total=20,
                      verbose=3,
                      start=0,
                      file=file,
                      freq=5,
                      show_times=False):
        pass
    file.seek(0)
    want = ['0/20...', '5/20...', '10/20...']
    got = [line.strip() for line in file.readlines()]
    if sys.platform.startswith('win32'):  # nocover
        # on windows \r seems to be mixed up with ansi sequences
        from xdoctest.utils import strip_ansi
        got = [strip_ansi(line).strip() for line in got]
    assert got == want
Ejemplo n.º 12
0
def multiple(size, iters, pX=0, qres=None, worker=None):
    """
    Runs the peeling decoder for a number of iterations. The graph is reused for speedup.
    """
    if worker == None:
        print(f"L = {size}, p = {pX}")
        worker = 0

    graph0 = go.init_toric_graph(size)
    graph1 = go.init_toric_graph(size)
    result = [
        single(size, pX, graph0, graph1, worker, i)
        for i in ProgIter(range(iters))
    ]

    suc_count = dd(int)
    for key in result:
        suc_count[key] += 1

    if qres is not None:
        qres.put(suc_count)
    else:
        return suc_count
Ejemplo n.º 13
0
def write_shards(tfrecord_path):
    global PREVIOUS_PROTEINS
    problems = []
    proteins = load_proteins(CSV_FILE_NAME)
    for dataset, type in [(proteins, "cif")]:
        shard_number = NUM_FILES - 1 if NUM_FILES > 0 else 0
        for dataset_item in ProgIter(dataset, verbose=1):
            if PREVIOUS_PROTEINS % ITEMS_PER_SHARD is 0:
                try:
                    writer.close()
                except Exception as e:
                    print('writer.close() exception', e)
                shard_path = os.path.join(tfrecord_path, type)
                if not os.path.exists(shard_path):
                    os.makedirs(shard_path)
                shard_path = os.path.join(shard_path,
                                          str(shard_number) + '.tfrecord')
                writer = tf.io.TFRecordWriter(shard_path, 'ZLIB')
                shard_number += 1
                if shard_number > SHARDS_PER_DATASET:
                    break
            try:
                data = load(type, dataset_item.lower())
                if data:
                    writer.write(data)
                    print('wrote', dataset_item, 'to', shard_path)
                    PREVIOUS_PROTEINS += 1
                else:
                    print('skipped writing', dataset_item, 'to', shard_path)
            except Exception as e:
                print('failed on', shard_number, dataset_item, shard_path)
                print(e)
                problems.append([shard_number, dataset_item, e])
    print('problem children:')
    [print(problem) for problem in problems]
    print('done!')
Ejemplo n.º 14
0
def write_shards():
    problems = []
    qm9 = load_folder("xyz")
    rxns = load_folder("rxn")
    proteins = load_proteins()
    for dataset, type in [
        (rxns, 'rxn')
    ]:  #[(qm9, "xyz"), (rxns, "rxn"), (proteins, "cif")]:
        shard_number, item_number = 0, 0
        for dataset_item in ProgIter(dataset, verbose=1):
            if item_number % ITEMS_PER_SHARD is 0:
                try:
                    writer.close()
                except Exception as e:
                    print('writer.close() exception', e)
                shard_path = os.path.join('.', 'datasets', 'tfrecord', type)
                if not os.path.exists(shard_path):
                    os.makedirs(shard_path)
                shard_path = os.path.join(shard_path,
                                          str(shard_number) + '.tfrecord')
                writer = tf.io.TFRecordWriter(shard_path, 'ZLIB')
                shard_number += 1
                if shard_number > SHARDS_PER_DATASET:
                    break
            try:
                data = load(type, dataset_item.lower())
                writer.write(data)
                print('wrote', dataset_item, 'to', shard_path)
            except Exception as e:
                print('failed on', shard_number, dataset_item, shard_path)
                print(e)
                problems.append([shard_number, dataset_item, e])
            item_number += 1
    print('problem children:')
    [print(problem) for problem in problems]
    print('done!')
Ejemplo n.º 15
0
def test_adjust_freq():
    # nothing to check (that I can think of) run test for coverage
    prog = ProgIter(range(20),
                    enabled=True,
                    eta_window=None,
                    rel_adjust_limit=4.0)

    # Adjust frequency up to have each update happen every 1sec or so
    prog.freq = 1
    prog.time_thresh = 1.0
    prog._max_between_count = -1.0
    prog._max_between_time = -1.0
    prog._between_time = 1
    prog._between_count = 1000
    prog._adjust_frequency()
    assert prog.freq == 4

    # Adjust frequency down to have each update happen every 1sec or so
    prog.freq = 1000
    prog.time_thresh = 1.0
    prog._max_between_count = -1.0
    prog._max_between_time = -1.0
    prog._between_time = 1
    prog._between_count = 1
    prog._adjust_frequency()
    assert prog.freq == 250

    # No need to adjust frequency to have each update happen every 1sec or so
    prog.freq = 1
    prog.time_thresh = 1.0
    prog._max_between_count = -1.0
    prog._max_between_time = -1.0
    prog._between_time = 1
    prog._between_count = 1
    prog._adjust_frequency()
    assert prog.freq == 1
Ejemplo n.º 16
0
def multiple(size,
             config,
             iters,
             ltype="toric",
             paulix=0,
             pauliz=0,
             superoperator=None,
             networked_architecture=False,
             erasure=0,
             measurex=0,
             measurez=0,
             dec=None,
             go=None,
             graph=None,
             qres=None,
             worker=0,
             seeds=None,
             called=True,
             progressbar=True,
             debug=False,
             **kwargs):
    """
    Runs the peeling decoder for a number of iterations. The graph is reused for speedup.
    """

    if qres is None:
        GHZ_success = superoperator.GHZ_success if superoperator is not None else None
        pr.print_configuration(config,
                               iters,
                               size=size,
                               paulix=paulix,
                               pauliz=pauliz,
                               erasure=erasure,
                               measurex=measurex,
                               measurez=measurez,
                               superoperator=superoperator,
                               GHZ_success=GHZ_success,
                               networked_architecture=networked_architecture)
    if graph is None:
        graph = lattice_type(ltype, config, dec, go, size, **kwargs)

    if seeds is None and not config["seeds"]:
        seeds = [
            init_random_seed(worker=worker, iteration=iter)
            for iter in range(iters)
        ]
    elif not config["seeds"]:
        seeds = config["seeds"]

    options = dict(
        ltype=ltype,
        superoperator=superoperator,
        networked_architecture=networked_architecture,
        paulix=paulix,
        pauliz=pauliz,
        erasure=erasure,
        measurex=measurex,
        measurez=measurez,
        graph=graph,
        worker=worker,
        called=0,
        debug=debug,
    )

    zipped = zip(ProgIter(range(iters)), seeds) if progressbar else zip(
        range(iters), seeds)
    result = [
        single(size, config, iter=iter, seed=seed, **options, **kwargs)
        for iter, seed in zipped
    ]

    if called:
        output = dict(N=iters, success=sum(result))
        if debug:
            output.update(**get_mean_var(graph.matching_weight, "weight"))
            for key, value in graph.decoder.clist.items():
                output.update(**get_mean_var(value, key))
            db.reset_counters(graph)
        return output
    else:
        output = dict(
            N=iters,
            success=sum(result),
        )
        if debug:
            output.update(weight=graph.matching_weight)
            output.update(**graph.decoder.clist)
            db.reset_counters(graph)
        qres.put(output)
Ejemplo n.º 17
0
def test_eta_window_None():
    # nothing to check (that I can think of) run test for coverage
    prog = ProgIter(range(20), enabled=True, eta_window=None)
    for _ in prog:
        pass
    def calc_synergy(self, agg_duplicate_smiles=True):
        a = self.df.shape[0]
        t1 = t()
        if agg_duplicate_smiles == True:
            temp = self.df['drug_row'] + " " + self.df[
                'drug_col']  # create a new string which combines drug_row and col
            self.df['drug_row_col'] = temp.transform(str.split).transform(
                frozenset
            )  # split by space, turn into set, make into a new column
            self.df2 = self.df.groupby(
                ["drug_row_col", 'cell_line_name'],
                as_index=False).agg(  #use new column to group
                    drug_row_id=('drug_row_id', lambda x: min(np.unique(x))),
                    drug_col_id=('drug_col_id', lambda x: max(np.unique(x))),
                    synergy_zip=('synergy_zip', np.mean),
                    synergy_bliss=('synergy_bliss', np.mean),
                    synergy_loewe=('synergy_loewe', np.mean),
                    synergy_hsa=('synergy_hsa', np.mean),
                    css_ri=('css_ri', np.mean))
            b = self.df2.shape[0]
        else:
            temp = self.df.groupby(['block_id'])[[
                'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa'
            ]].mean()
            self.df = self.df.drop_duplicates(subset=['block_id'],
                                              inplace=False)
            self.df = self.df.drop(columns=[
                'drug_col', 'drug_row', 'drug_row_cid', 'drug_col_cid',
                'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa'
            ],
                                   inplace=False)
            self.df.set_index('block_id', drop=True, inplace=True)
            self.df2 = pd.concat((self.df, temp), axis=1)
            b = self.df2.shape[0]

        print(
            f'shrank dataset from {a} to {b} rows in {round((t()-t1), 2)} seconds'
        )

        if self.bitaverage:
            self.holder = np.zeros(
                (len(self.df2), len(self.fingerprints[1][self.low:self.high])),
                dtype=np.float32)

            for e, i in enumerate(self.df2.itertuples()):
                self.holder[e] = np.mean([
                    self.fingerprints[i.drug_row_id][self.low:self.high],
                    self.fingerprints[i.drug_col_id][self.low:self.high]
                ],
                                         axis=0)

        if not self.bitaverage:
            self.holder = np.zeros((len(
                self.df2), 2 * len(self.fingerprints[1][self.low:self.high])),
                                   dtype=np.float32)

            for e, i in enumerate(self.df2.itertuples()):
                self.holder[e] = np.append(
                    self.fingerprints[i.drug_row_id][self.low:self.high],
                    self.fingerprints[i.drug_col_id][self.low:self.high])

        ready = dict()

        for x in ProgIter(
            [
                'css_ri', 'synergy_zip', 'synergy_bliss', 'synergy_loewe',
                'synergy_hsa'
            ],
                desc=
                'creating 5 datasets packed into dict with metrics as key names',
                show_times=False,
                total=5):
            if not self.bitaverage:
                columns = [
                    str(zz) +
                    '_drugRow' if zz in range(0, int(self.holder.shape[1] /
                                                     2), 1) else str(zz) +
                    '_drugCol' for zz in range(int(self.holder.shape[1]))
                ]  # [f(x) if condition else g(x) for x in sequence]
            if self.bitaverage:
                columns = [
                    str(zz) + '_drugAveraged'
                    for zz in range(0, int(self.holder.shape[1]), 1)
                ]
            a = copy.deepcopy(self.holder)
            a = pd.DataFrame(a, columns=columns, index=self.df2.index)
            if agg_duplicate_smiles:
                b = self.df2.loc[:, ['cell_line_name', 'drug_row_col', x]]
            else:
                b = self.df2.loc[:, ['cell_line_name', x]]
            out = pd.concat((b, a), axis=1)

            ready[x] = out

        ready['name'] = self.fps_name[36:]
        return ready
Ejemplo n.º 19
0
def run_train_all_sklearn(file, fp_name, cv=5, verbose=0, seed=1):

    np.random.seed(seed)
    c = defaultdict(list)

    for k in ProgIter([
            'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa',
            'css_ri', 'name'
    ],
                      verbose=verbose,
                      total=5):
        v = file[k]

        if k != 'name':
            temp = dict(
            )  # for results storage. Assuming that "name" comes last

            if 'drug_row_col' in v.columns:
                v.drop(columns=['drug_row_col'], inplace=True)

            cat_cols = ['cell_line_name']
            categories = [
                v[column].unique() for column in v[cat_cols]
            ]  # manually find all available categories for one-hot

            # pipelines
            encode = Pipeline(steps=[('one-hot-encode',
                                      OneHotEncoder(categories=categories))])
            processor = ColumnTransformer(transformers=[
                ('cat_encoding', encode, cat_cols), ('dropping', 'drop', [k])
            ],
                                          remainder='passthrough')

            catbst = ColumnTransformer(transformers=[('dropping', 'drop', [k])
                                                     ],
                                       remainder='passthrough')

            # regressions
            lr = make_pipeline(processor, linear_model.LinearRegression())
            ridge = make_pipeline(processor, linear_model.Ridge())
            lasso = make_pipeline(processor, linear_model.Lasso())
            elastic = make_pipeline(processor, linear_model.ElasticNet())
            lassolars = make_pipeline(processor, linear_model.LassoLars())
            b_ridge = make_pipeline(processor, linear_model.BayesianRidge())
            kernel = DotProduct() + WhiteKernel()
            gpr = make_pipeline(processor,
                                GaussianProcessRegressor(kernel=kernel))
            linSVR = make_pipeline(processor, LinearSVR())
            hist_gbr = make_pipeline(
                processor,
                HistGradientBoostingRegressor(warm_start=True, max_depth=6))
            rfr = make_pipeline(
                processor,
                RandomForestRegressor(warm_start=True, max_depth=6, n_jobs=3))
            iso = make_pipeline(processor,
                                IsotonicRegression(increasing='auto'))
            xgb = make_pipeline(
                processor, XGBRegressor(tree_method='gpu_hist', max_depth=6))
            cbt = make_pipeline(
                catbst,
                CatBoostRegressor(task_type='GPU',
                                  depth=6,
                                  cat_features=np.array([0]),
                                  verbose=False))

            mls = [
                cbt, rfr, gpr, hist_gbr, lr, ridge, lasso, elastic, lassolars,
                b_ridge, gpr, linSVR, iso
            ]
            mls_names = [
                "cbt", "rfr", "gpr", "hist_gbr", "lr", "ridge", "lasso",
                "elastic", "lassolars", "b_ridge", "gpr", "linSVR", "iso"
            ]

            # results
            start = time.time()
            for MODEL, name in zip(mls, mls_names):
                print(f'\n{name}')
                if 'cbt' == name:
                    n_jobs = 1
                else:
                    n_jobs = cv
                cv_dict = cross_validate(
                    MODEL,
                    v,
                    v[k],
                    cv=cv,
                    scoring={
                        "pearsonr": pearson,
                        "rmse": rmse
                    },
                    return_train_score=False,
                    verbose=verbose,
                    n_jobs=n_jobs,
                )
                temp[name] = {
                    'test_pearsonr': np.nanmean(cv_dict['test_pearsonr']),
                    'test_rmse': abs(np.nanmean(cv_dict['test_rmse']))
                }
                print(temp[name])
            print(f'{k} took {int(time.time()-start)/60} mins')

            c[k] = temp
        else:
            nm = f'/tf/notebooks/code_for_pub/_logs_as_python_files/{fp_name}_13models_5foldCV_{time.ctime()}.pickle'
            with open(nm, 'wb') as file:
                pickle.dump(c, file)
            print(f'saving complete to {nm}')
    return c
Ejemplo n.º 20
0
def run_train(file,
              fp_name,
              cv=10,
              for_valid=0.4,
              ordered=False,
              ram_fraction=0.95,
              save=False,
              cv_params=None):
    cv_lower = 1
    cv_higher = 1 + cv
    if cv_params is None:
        cv_params = dict()
        cv_params['bootstrap_type'] = 'Poisson'
        cv_params['l2_leaf_reg'] = 9
        cv_params['learning_rate'] = 0.15
        cv_params['depth'] = 10
        cv_params['cat_features'] = ['cell_line_name']
        cv_params['use_best_model'] = True
        cv_params['early_stopping_rounds'] = 50
        cv_params['iterations'] = 5000
        cv_params['task_type'] = 'GPU'
    else:
        cv_params = cv_params
    if ordered:
        cv_params['boosting_type'] = 'Ordered'

    cat_features = cv_params['cat_features']
    cv_params['gpu_ram_part'] = ram_fraction

    f = for_valid
    c = defaultdict(list)

    for k in ProgIter([
            'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa',
            'css_ri', 'name'
    ],
                      total=5,
                      verbose=1):
        v_temp = file[k]
        if k != 'name':
            if 'drug_row_col' in v_temp.columns:
                v = v_temp.drop(columns=['drug_row_col'], inplace=False)
            else:
                v = v_temp
            size = int(v.shape[0] * f)  # 40% for valid
            a = []
            for i in range(cv_lower, cv_higher, 1):
                print(k)
                # sampling
                np.random.seed(i)
                idx_valid = pd.Index(
                    np.random.choice(v.index, size, replace=False))
                idx_test = v.index.difference(idx_valid)
                train = v.loc[
                    idx_test, :]  # returns df without the dropped idx
                valid = v.loc[idx_valid, :]

                #prep datasets
                true_labels = valid.pop(k)
                y = train.pop(k)
                eval_dataset = Pool(valid,
                                    true_labels,
                                    cat_features=cat_features)

                #create a model
                model = CatBoostRegressor(**cv_params)
                model.fit(train,
                          y,
                          eval_set=eval_dataset,
                          plot=False,
                          verbose=1000)

                # get stats
                preds = model.predict(valid)
                corr = pearsonr(true_labels, preds)
                rmse = np.sqrt(mean_squared_error(true_labels, preds))
                if save:
                    print(f'iteration: {i}, pearson: {corr}, rmse: {rmse}'
                          )  #,file=f, flush=True)
                    a.append([corr, rmse, true_labels, preds])
                else:
                    a.append([corr, rmse])
                    print(f'iteration: {i}, pearson: {corr}, rmse: {rmse}'
                          )  #,file=f, flush=True)
            c[k].append(a)
        else:
            c['name'].append(
                [v, for_valid,
                 cv])  # name of the fp, valid percentage, number of cv folds
            if save:
                nm = f'/tf/notebooks/code_for_pub/_logs_as_python_files/{fp_name}_noreplicates_{for_valid}_{time.ctime()}.pickle'
                with open(nm, 'wb') as file:
                    pickle.dump(c, file)
    return c
Ejemplo n.º 21
0
def test_tqdm_compatibility():
    prog = ProgIter(range(20), total=20, miniters=17, show_times=False)
    assert prog.pos == 0
    assert prog.freq == 17
    for _ in prog:
        pass

    with CaptureStdout() as cap:
        ProgIter.write('foo')
    assert cap.text.strip() == 'foo'

    with CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_description('new desc', refresh=False)
        prog.begin()
        prog.refresh()
        prog.close()
    assert prog.label == 'new desc'
    assert 'new desc' in cap.text.strip()

    with CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_description('new desc', refresh=True)
        prog.close()
    assert prog.label == 'new desc'
    assert 'new desc' in cap.text.strip()

    with CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_description_str('new desc')
        prog.begin()
        prog.refresh()
        prog.close()
    assert prog.label == 'new desc'
    assert 'new desc' in cap.text.strip()

    with CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_postfix({'foo': 'bar'}, baz='biz', x=object(), y=2)
        prog.begin()
    assert prog.length is None
    assert 'foo=bar' in cap.text.strip()
    assert 'baz=biz' in cap.text.strip()
    assert 'y=2' in cap.text.strip()
    assert 'x=<object' in cap.text.strip()

    with CaptureStdout() as cap:
        prog = ProgIter(show_times=False)
        prog.set_postfix_str('bar baz', refresh=False)
    assert 'bar baz' not in cap.text.strip()
Ejemplo n.º 22
0
def time_progiter_overhead():
    # Time the overhead of this function
    import timeit
    import textwrap
    setup = textwrap.dedent('''
        from sklearn.externals.progiter import ProgIter
        import numpy as np
        import time
        from six.moves import cStringIO, range
        import utool as ut
        N = 500
        file = cStringIO()
        rng = np.random.RandomState(42)
        ndims = 2
        vec1 = rng.rand(113, ndims)
        vec2 = rng.rand(71, ndims)

        def minimal_wraper1(sequence):
            for item in sequence:
                yield item

        def minimal_wraper2(sequence):
            for count, item in enumerate(sequence, start=1):
                yield item

        def minimal_wraper3(sequence):
            count = 0
            for item in sequence:
                yield item
                count += 1

        def minwrap4(sequence):
            for count, item in enumerate(sequence, start=1):
                yield item
                if count % 100:
                    pass

        def minwrap5(sequence):
            for count, item in enumerate(sequence, start=1):
                yield item
                if time.time() < 100:
                    pass
        ''')
    statements = {
        'baseline':
        '[{work} for n in range(N)]',
        'creation':
        'ProgIter(range(N))',
        'minwrap1':
        '[{work} for n in minimal_wraper1(range(N))]',
        'minwrap2':
        '[{work} for n in minimal_wraper2(range(N))]',
        'minwrap3':
        '[{work} for n in minimal_wraper3(range(N))]',
        'minwrap4':
        '[{work} for n in minwrap4(range(N))]',
        'minwrap5':
        '[{work} for n in minwrap5(range(N))]',
        '(sk-disabled)':
        '[{work} for n in ProgIter(range(N), enabled=False, file=file)]',  # NOQA
        '(sk-plain)':
        '[{work} for n in ProgIter(range(N), file=file)]',  # NOQA
        '(sk-freq)':
        '[{work} for n in ProgIter(range(N), file=file, freq=100)]',  # NOQA
        '(sk-no-adjust)':
        '[{work} for n in ProgIter(range(N), file=file, adjust=False, freq=200)]',  # NOQA
        '(sk-high-freq)':
        '[{work} for n in ProgIter(range(N), file=file, adjust=False, freq=200)]',  # NOQA

        # '(ut-disabled)'  : '[{work} for n in ut.ProgIter(range(N), enabled=False, file=file)]',    # NOQA
        # '(ut-plain)'     : '[{work} for n in ut.ProgIter(range(N), file=file)]',  # NOQA
        # '(ut-freq)'      : '[{work} for n in ut.ProgIter(range(N), freq=100, file=file)]',  # NOQA
        # '(ut-no-adjust)' : '[{work} for n in ut.ProgIter(range(N), freq=200, adjust=False, file=file)]',  # NOQA
        # '(ut-high-freq)' : '[{work} for n in ut.ProgIter(range(N), file=file, adjust=False, freq=200)]',  # NOQA
    }
    # statements = {
    #     'calc_baseline': '[vec1.dot(vec2.T) for n in range(M)]',  # NOQA
    #     'calc_plain': '[vec1.dot(vec2.T) for n in ProgIter(range(M), file=file)]',  # NOQA
    #     'calc_plain_ut': '[vec1.dot(vec2.T) for n in ut.ProgIter(range(M), file=file)]',  # NOQA
    # }
    timeings = {}

    work_strs = [
        'None',
        'vec1.dot(vec2.T)',
        'n % 10 == 0',
    ]
    work = work_strs[0]
    # work = work_strs[1]

    number = 10000
    prog = ProgIter(desc='timing', adjust=True)
    for key, stmt in prog(statements.items()):
        prog.set_extra(key)
        secs = timeit.timeit(stmt.format(work=work), setup, number=number)
        timeings[key] = secs / number
Ejemplo n.º 23
0
def test_progiter():
    # Define a function that takes some time
    def is_prime(n):
        return n >= 2 and not any(n % i == 0 for i in range(2, n))

    N = 500

    if False:
        file = cStringIO()
        prog = ProgIter(range(N),
                        clearline=False,
                        file=file,
                        freq=N // 10,
                        adjust=False)
        file.seek(0)
        print(file.read())

        prog = ProgIter(range(N), clearline=False)
        for n in prog:
            was_prime = is_prime(n)
            prog.set_extra('n=%r, was_prime=%r' % (
                n,
                was_prime,
            ))
            if (n + 1) % 128 == 0 and was_prime:
                prog.set_extra('n=%r, was_prime=%r EXTRA' % (
                    n,
                    was_prime,
                ))
        file.seek(0)
        print(file.read())

    total = 200
    N = 5000
    N0 = N - total
    print('N = %r' % (N, ))
    print('N0 = %r' % (N0, ))

    print('\n-----')
    print('Demo #0: progress can be disabled and incur essentially 0 overhead')
    print('However, the overhead of enabled progress is minimal and typically '
          'insignificant')
    print('this is verbosity mode verbose=0')
    sequence = (is_prime(n) for n in range(N0, N))
    # with ub.Timer('demo0'):
    if True:
        psequence = ProgIter(sequence,
                             total=total,
                             desc='demo0',
                             enabled=False)
        list(psequence)

    print('\n-----')
    print('Demo #1: progress is shown by default in the same line')
    print('this is verbosity mode verbose=1')
    sequence = (is_prime(n) for n in range(N0, N))
    # with ub.Timer('demo1'):
    if True:
        psequence = ProgIter(sequence, total=total, desc='demo1')
        list(psequence)

    # Default behavior adjusts frequency of progress reporting so
    # the performance of the loop is minimally impacted
    print('\n-----')
    print('Demo #2: clearline=False prints multiple lines.')
    print('Progress is only printed as needed')
    print('Notice the adjustment behavior of the print frequency')
    print('this is verbosity mode verbose=2')
    # with ub.Timer('demo2'):
    if True:
        sequence = (is_prime(n) for n in range(N0, N))
        psequence = ProgIter(sequence,
                             total=total,
                             clearline=False,
                             desc='demo2')
        list(psequence)
        # import utool as ut
        # print(ut.repr4(psequence.__dict__))

    print('\n-----')
    print('Demo #3: Adjustments can be turned off to give constant feedback')
    print('this is verbosity mode verbose=3')
    sequence = (is_prime(n) for n in range(N0, N))
    # with ub.Timer('demo3'):
    if True:
        psequence = ProgIter(sequence,
                             total=total,
                             adjust=False,
                             clearline=False,
                             freq=100,
                             desc='demo3')
        list(psequence)
Ejemplo n.º 24
0
        if not isinstance(SL, complex):
            count = count_B(SL, count)
            count = count_B(SL, count)

    return count


def count(S, counter):
    N = math.log(S) / math.log(3) // 1
    return counter(S, N)


Mmax = 1000

M = [4 + i**2 for i in range(Mmax)]
cb = [count(m, count_B) for m in ProgIter(M)]
cc = [count(m, count_C) for m in ProgIter(M)]

f0 = plt.figure()
plt.plot(M, cb, color="C0", label='Numeric data B: no bound')
plt.plot(M, cc, color="C2", label='Numeric data C: with bound')
plt.plot(M, [2 * x * math.log(x) / math.log(27) - 2 / 3 * x + 1 for x in M],
         'r:',
         label="Analytic function $O(N\log_{27}N$)")
plt.legend()
plt.ylabel("operations")
plt.xlabel("N")
plt.title("Complexity of algorithm")
f0.savefig("../figures/complexity_numeric_vs_analytic.pdf",
           transparent=True,
           format="pdf",