def test_initial(): """ Make sure a question mark is printed if the total is unknown """ file = cStringIO() prog = ProgIter(initial=9001, file=file, show_times=False, clearline=False) message = prog.format_message() assert strip_ansi(message) == ' 9001/?... \n'
def test_disabled(): prog = ProgIter(range(20), enabled=True) prog.begin() assert prog.started prog = ProgIter(range(20), enabled=False) prog.begin() prog.step() assert not prog.started
def multiple(size, iters, pE=0, pX=0, pZ=0, plot_load=False, qres=None, worker=0, seeds=None, config=None, **kwargs): """ Runs the peeling decoder for a number of iterations. The graph is reused for speedup. """ # import uf config if config is None: config = decoder_config() if seeds is None: seeds = [ te.init_random_seed(worker=worker, iteration=iter) for iter in range(iters) ] graph = go.init_toric_graph(size) result = [ single(size, pE, pX, pZ, plot_load, graph, worker, i, seed, config) for i, seed in ProgIter(zip(range(iters), seeds)) ] N_succes = sum(result) if qres is not None: qres.put(N_succes) else: return N_succes
def mols2graphs(mols): """ inputs mols: a list of molecules outputs cand_graphs: a list of dgl graphs """ graphs = [] for mol in ProgIter(mols): n_atoms = mol.GetNumAtoms() g = DGLGraph() node_feats = [] for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() node_feats.append(atom_features(atom)) g.add_nodes(n_atoms) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() begin_idx = a1.GetIdx() end_idx = a2.GetIdx() bond_src.append(begin_idx) bond_dst.append(end_idx) bond_src.append(end_idx) bond_dst.append(begin_idx) g.add_edges(bond_src, bond_dst) g.ndata['h'] = torch.Tensor([a.tolist() for a in node_feats]) graphs.append(g) return graphs
def multiple(iters, size, p, worker=0, qres=None): graph = go.init_toric_graph(size) results = [single(graph, p, it, worker) for it in ProgIter(range(iters))] if qres is not None: qres.put(results) else: return results
def smiles2mols(smiles): mols = [] for sm in ProgIter(smiles): mol = get_mol(sm) if mol is not None: mols.append(mol) else: print('Could not construct a molecule:', sm) return mols
def test_rate_format(): # Define a function that takes some time file = cStringIO() prog = ProgIter(file=file) prog.begin() prog._iters_per_second = .000001 msg = prog.format_message() rate_part = msg.split('rate=')[1].split(' Hz')[0] assert rate_part == '1e-06' prog._iters_per_second = .1 msg = prog.format_message() rate_part = msg.split('rate=')[1].split(' Hz')[0] assert rate_part == '0.10' prog._iters_per_second = 10000 msg = prog.format_message() rate_part = msg.split('rate=')[1].split(' Hz')[0] assert rate_part == '10000.00'
def test_unknown_total(): """ Make sure a question mark is printed if the total is unknown """ iterable = (_ for _ in range(0, 10)) file = cStringIO() prog = ProgIter(iterable, desc='unknown seq', file=file, show_times=False, verbose=1) for n in prog: pass file.seek(0) got = [line.strip() for line in file.readlines()] # prints an eroteme if total is unknown assert len(got) > 0, 'should have gotten something' assert all('?' in line for line in got), 'all lines should have an eroteme'
def read_terrible_json(path): """ Reads a slightly malformed json file where each line is a different json dict. Args: path (string): the filepath to read from Returns: [dict]: list of dictionaries """ with open(path, "rt") as f: lines = [] test_read_lines = [x for x in f.readlines()] for x in ProgIter(test_read_lines): if x: x = x.replace("/", "\/") x = json.loads(x) lines.append(x) return lines
def test_clearline(): """ Make sure a question mark is printed if the total is unknown pytest tests/test_progiter.py::test_clearline """ file = cStringIO() # Clearline=False version should simply have a newline at the end. prog = ProgIter(file=file, show_times=False, clearline=False) message = prog.format_message() assert strip_ansi(message).strip(' ') == '0/?... \n' # Clearline=True version should carrage return at the begining and have no # newline at the end. prog = ProgIter(file=file, show_times=False, clearline=True) message = prog.format_message() assert strip_ansi(message).strip(' ') == '\r 0/?...'
def test_progiter_offset_0(): """ pytest -s ~/code/progiter/tests/test_progiter.py::test_progiter_offset_0 """ # Define a function that takes some time file = cStringIO() for _ in ProgIter(range(10), total=20, verbose=3, start=0, file=file, freq=5, show_times=False): pass file.seek(0) want = ['0/20...', '5/20...', '10/20...'] got = [line.strip() for line in file.readlines()] if sys.platform.startswith('win32'): # nocover # on windows \r seems to be mixed up with ansi sequences from xdoctest.utils import strip_ansi got = [strip_ansi(line).strip() for line in got] assert got == want
def multiple(size, iters, pX=0, qres=None, worker=None): """ Runs the peeling decoder for a number of iterations. The graph is reused for speedup. """ if worker == None: print(f"L = {size}, p = {pX}") worker = 0 graph0 = go.init_toric_graph(size) graph1 = go.init_toric_graph(size) result = [ single(size, pX, graph0, graph1, worker, i) for i in ProgIter(range(iters)) ] suc_count = dd(int) for key in result: suc_count[key] += 1 if qres is not None: qres.put(suc_count) else: return suc_count
def write_shards(tfrecord_path): global PREVIOUS_PROTEINS problems = [] proteins = load_proteins(CSV_FILE_NAME) for dataset, type in [(proteins, "cif")]: shard_number = NUM_FILES - 1 if NUM_FILES > 0 else 0 for dataset_item in ProgIter(dataset, verbose=1): if PREVIOUS_PROTEINS % ITEMS_PER_SHARD is 0: try: writer.close() except Exception as e: print('writer.close() exception', e) shard_path = os.path.join(tfrecord_path, type) if not os.path.exists(shard_path): os.makedirs(shard_path) shard_path = os.path.join(shard_path, str(shard_number) + '.tfrecord') writer = tf.io.TFRecordWriter(shard_path, 'ZLIB') shard_number += 1 if shard_number > SHARDS_PER_DATASET: break try: data = load(type, dataset_item.lower()) if data: writer.write(data) print('wrote', dataset_item, 'to', shard_path) PREVIOUS_PROTEINS += 1 else: print('skipped writing', dataset_item, 'to', shard_path) except Exception as e: print('failed on', shard_number, dataset_item, shard_path) print(e) problems.append([shard_number, dataset_item, e]) print('problem children:') [print(problem) for problem in problems] print('done!')
def write_shards(): problems = [] qm9 = load_folder("xyz") rxns = load_folder("rxn") proteins = load_proteins() for dataset, type in [ (rxns, 'rxn') ]: #[(qm9, "xyz"), (rxns, "rxn"), (proteins, "cif")]: shard_number, item_number = 0, 0 for dataset_item in ProgIter(dataset, verbose=1): if item_number % ITEMS_PER_SHARD is 0: try: writer.close() except Exception as e: print('writer.close() exception', e) shard_path = os.path.join('.', 'datasets', 'tfrecord', type) if not os.path.exists(shard_path): os.makedirs(shard_path) shard_path = os.path.join(shard_path, str(shard_number) + '.tfrecord') writer = tf.io.TFRecordWriter(shard_path, 'ZLIB') shard_number += 1 if shard_number > SHARDS_PER_DATASET: break try: data = load(type, dataset_item.lower()) writer.write(data) print('wrote', dataset_item, 'to', shard_path) except Exception as e: print('failed on', shard_number, dataset_item, shard_path) print(e) problems.append([shard_number, dataset_item, e]) item_number += 1 print('problem children:') [print(problem) for problem in problems] print('done!')
def test_adjust_freq(): # nothing to check (that I can think of) run test for coverage prog = ProgIter(range(20), enabled=True, eta_window=None, rel_adjust_limit=4.0) # Adjust frequency up to have each update happen every 1sec or so prog.freq = 1 prog.time_thresh = 1.0 prog._max_between_count = -1.0 prog._max_between_time = -1.0 prog._between_time = 1 prog._between_count = 1000 prog._adjust_frequency() assert prog.freq == 4 # Adjust frequency down to have each update happen every 1sec or so prog.freq = 1000 prog.time_thresh = 1.0 prog._max_between_count = -1.0 prog._max_between_time = -1.0 prog._between_time = 1 prog._between_count = 1 prog._adjust_frequency() assert prog.freq == 250 # No need to adjust frequency to have each update happen every 1sec or so prog.freq = 1 prog.time_thresh = 1.0 prog._max_between_count = -1.0 prog._max_between_time = -1.0 prog._between_time = 1 prog._between_count = 1 prog._adjust_frequency() assert prog.freq == 1
def multiple(size, config, iters, ltype="toric", paulix=0, pauliz=0, superoperator=None, networked_architecture=False, erasure=0, measurex=0, measurez=0, dec=None, go=None, graph=None, qres=None, worker=0, seeds=None, called=True, progressbar=True, debug=False, **kwargs): """ Runs the peeling decoder for a number of iterations. The graph is reused for speedup. """ if qres is None: GHZ_success = superoperator.GHZ_success if superoperator is not None else None pr.print_configuration(config, iters, size=size, paulix=paulix, pauliz=pauliz, erasure=erasure, measurex=measurex, measurez=measurez, superoperator=superoperator, GHZ_success=GHZ_success, networked_architecture=networked_architecture) if graph is None: graph = lattice_type(ltype, config, dec, go, size, **kwargs) if seeds is None and not config["seeds"]: seeds = [ init_random_seed(worker=worker, iteration=iter) for iter in range(iters) ] elif not config["seeds"]: seeds = config["seeds"] options = dict( ltype=ltype, superoperator=superoperator, networked_architecture=networked_architecture, paulix=paulix, pauliz=pauliz, erasure=erasure, measurex=measurex, measurez=measurez, graph=graph, worker=worker, called=0, debug=debug, ) zipped = zip(ProgIter(range(iters)), seeds) if progressbar else zip( range(iters), seeds) result = [ single(size, config, iter=iter, seed=seed, **options, **kwargs) for iter, seed in zipped ] if called: output = dict(N=iters, success=sum(result)) if debug: output.update(**get_mean_var(graph.matching_weight, "weight")) for key, value in graph.decoder.clist.items(): output.update(**get_mean_var(value, key)) db.reset_counters(graph) return output else: output = dict( N=iters, success=sum(result), ) if debug: output.update(weight=graph.matching_weight) output.update(**graph.decoder.clist) db.reset_counters(graph) qres.put(output)
def test_eta_window_None(): # nothing to check (that I can think of) run test for coverage prog = ProgIter(range(20), enabled=True, eta_window=None) for _ in prog: pass
def calc_synergy(self, agg_duplicate_smiles=True): a = self.df.shape[0] t1 = t() if agg_duplicate_smiles == True: temp = self.df['drug_row'] + " " + self.df[ 'drug_col'] # create a new string which combines drug_row and col self.df['drug_row_col'] = temp.transform(str.split).transform( frozenset ) # split by space, turn into set, make into a new column self.df2 = self.df.groupby( ["drug_row_col", 'cell_line_name'], as_index=False).agg( #use new column to group drug_row_id=('drug_row_id', lambda x: min(np.unique(x))), drug_col_id=('drug_col_id', lambda x: max(np.unique(x))), synergy_zip=('synergy_zip', np.mean), synergy_bliss=('synergy_bliss', np.mean), synergy_loewe=('synergy_loewe', np.mean), synergy_hsa=('synergy_hsa', np.mean), css_ri=('css_ri', np.mean)) b = self.df2.shape[0] else: temp = self.df.groupby(['block_id'])[[ 'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa' ]].mean() self.df = self.df.drop_duplicates(subset=['block_id'], inplace=False) self.df = self.df.drop(columns=[ 'drug_col', 'drug_row', 'drug_row_cid', 'drug_col_cid', 'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa' ], inplace=False) self.df.set_index('block_id', drop=True, inplace=True) self.df2 = pd.concat((self.df, temp), axis=1) b = self.df2.shape[0] print( f'shrank dataset from {a} to {b} rows in {round((t()-t1), 2)} seconds' ) if self.bitaverage: self.holder = np.zeros( (len(self.df2), len(self.fingerprints[1][self.low:self.high])), dtype=np.float32) for e, i in enumerate(self.df2.itertuples()): self.holder[e] = np.mean([ self.fingerprints[i.drug_row_id][self.low:self.high], self.fingerprints[i.drug_col_id][self.low:self.high] ], axis=0) if not self.bitaverage: self.holder = np.zeros((len( self.df2), 2 * len(self.fingerprints[1][self.low:self.high])), dtype=np.float32) for e, i in enumerate(self.df2.itertuples()): self.holder[e] = np.append( self.fingerprints[i.drug_row_id][self.low:self.high], self.fingerprints[i.drug_col_id][self.low:self.high]) ready = dict() for x in ProgIter( [ 'css_ri', 'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa' ], desc= 'creating 5 datasets packed into dict with metrics as key names', show_times=False, total=5): if not self.bitaverage: columns = [ str(zz) + '_drugRow' if zz in range(0, int(self.holder.shape[1] / 2), 1) else str(zz) + '_drugCol' for zz in range(int(self.holder.shape[1])) ] # [f(x) if condition else g(x) for x in sequence] if self.bitaverage: columns = [ str(zz) + '_drugAveraged' for zz in range(0, int(self.holder.shape[1]), 1) ] a = copy.deepcopy(self.holder) a = pd.DataFrame(a, columns=columns, index=self.df2.index) if agg_duplicate_smiles: b = self.df2.loc[:, ['cell_line_name', 'drug_row_col', x]] else: b = self.df2.loc[:, ['cell_line_name', x]] out = pd.concat((b, a), axis=1) ready[x] = out ready['name'] = self.fps_name[36:] return ready
def run_train_all_sklearn(file, fp_name, cv=5, verbose=0, seed=1): np.random.seed(seed) c = defaultdict(list) for k in ProgIter([ 'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa', 'css_ri', 'name' ], verbose=verbose, total=5): v = file[k] if k != 'name': temp = dict( ) # for results storage. Assuming that "name" comes last if 'drug_row_col' in v.columns: v.drop(columns=['drug_row_col'], inplace=True) cat_cols = ['cell_line_name'] categories = [ v[column].unique() for column in v[cat_cols] ] # manually find all available categories for one-hot # pipelines encode = Pipeline(steps=[('one-hot-encode', OneHotEncoder(categories=categories))]) processor = ColumnTransformer(transformers=[ ('cat_encoding', encode, cat_cols), ('dropping', 'drop', [k]) ], remainder='passthrough') catbst = ColumnTransformer(transformers=[('dropping', 'drop', [k]) ], remainder='passthrough') # regressions lr = make_pipeline(processor, linear_model.LinearRegression()) ridge = make_pipeline(processor, linear_model.Ridge()) lasso = make_pipeline(processor, linear_model.Lasso()) elastic = make_pipeline(processor, linear_model.ElasticNet()) lassolars = make_pipeline(processor, linear_model.LassoLars()) b_ridge = make_pipeline(processor, linear_model.BayesianRidge()) kernel = DotProduct() + WhiteKernel() gpr = make_pipeline(processor, GaussianProcessRegressor(kernel=kernel)) linSVR = make_pipeline(processor, LinearSVR()) hist_gbr = make_pipeline( processor, HistGradientBoostingRegressor(warm_start=True, max_depth=6)) rfr = make_pipeline( processor, RandomForestRegressor(warm_start=True, max_depth=6, n_jobs=3)) iso = make_pipeline(processor, IsotonicRegression(increasing='auto')) xgb = make_pipeline( processor, XGBRegressor(tree_method='gpu_hist', max_depth=6)) cbt = make_pipeline( catbst, CatBoostRegressor(task_type='GPU', depth=6, cat_features=np.array([0]), verbose=False)) mls = [ cbt, rfr, gpr, hist_gbr, lr, ridge, lasso, elastic, lassolars, b_ridge, gpr, linSVR, iso ] mls_names = [ "cbt", "rfr", "gpr", "hist_gbr", "lr", "ridge", "lasso", "elastic", "lassolars", "b_ridge", "gpr", "linSVR", "iso" ] # results start = time.time() for MODEL, name in zip(mls, mls_names): print(f'\n{name}') if 'cbt' == name: n_jobs = 1 else: n_jobs = cv cv_dict = cross_validate( MODEL, v, v[k], cv=cv, scoring={ "pearsonr": pearson, "rmse": rmse }, return_train_score=False, verbose=verbose, n_jobs=n_jobs, ) temp[name] = { 'test_pearsonr': np.nanmean(cv_dict['test_pearsonr']), 'test_rmse': abs(np.nanmean(cv_dict['test_rmse'])) } print(temp[name]) print(f'{k} took {int(time.time()-start)/60} mins') c[k] = temp else: nm = f'/tf/notebooks/code_for_pub/_logs_as_python_files/{fp_name}_13models_5foldCV_{time.ctime()}.pickle' with open(nm, 'wb') as file: pickle.dump(c, file) print(f'saving complete to {nm}') return c
def run_train(file, fp_name, cv=10, for_valid=0.4, ordered=False, ram_fraction=0.95, save=False, cv_params=None): cv_lower = 1 cv_higher = 1 + cv if cv_params is None: cv_params = dict() cv_params['bootstrap_type'] = 'Poisson' cv_params['l2_leaf_reg'] = 9 cv_params['learning_rate'] = 0.15 cv_params['depth'] = 10 cv_params['cat_features'] = ['cell_line_name'] cv_params['use_best_model'] = True cv_params['early_stopping_rounds'] = 50 cv_params['iterations'] = 5000 cv_params['task_type'] = 'GPU' else: cv_params = cv_params if ordered: cv_params['boosting_type'] = 'Ordered' cat_features = cv_params['cat_features'] cv_params['gpu_ram_part'] = ram_fraction f = for_valid c = defaultdict(list) for k in ProgIter([ 'synergy_zip', 'synergy_bliss', 'synergy_loewe', 'synergy_hsa', 'css_ri', 'name' ], total=5, verbose=1): v_temp = file[k] if k != 'name': if 'drug_row_col' in v_temp.columns: v = v_temp.drop(columns=['drug_row_col'], inplace=False) else: v = v_temp size = int(v.shape[0] * f) # 40% for valid a = [] for i in range(cv_lower, cv_higher, 1): print(k) # sampling np.random.seed(i) idx_valid = pd.Index( np.random.choice(v.index, size, replace=False)) idx_test = v.index.difference(idx_valid) train = v.loc[ idx_test, :] # returns df without the dropped idx valid = v.loc[idx_valid, :] #prep datasets true_labels = valid.pop(k) y = train.pop(k) eval_dataset = Pool(valid, true_labels, cat_features=cat_features) #create a model model = CatBoostRegressor(**cv_params) model.fit(train, y, eval_set=eval_dataset, plot=False, verbose=1000) # get stats preds = model.predict(valid) corr = pearsonr(true_labels, preds) rmse = np.sqrt(mean_squared_error(true_labels, preds)) if save: print(f'iteration: {i}, pearson: {corr}, rmse: {rmse}' ) #,file=f, flush=True) a.append([corr, rmse, true_labels, preds]) else: a.append([corr, rmse]) print(f'iteration: {i}, pearson: {corr}, rmse: {rmse}' ) #,file=f, flush=True) c[k].append(a) else: c['name'].append( [v, for_valid, cv]) # name of the fp, valid percentage, number of cv folds if save: nm = f'/tf/notebooks/code_for_pub/_logs_as_python_files/{fp_name}_noreplicates_{for_valid}_{time.ctime()}.pickle' with open(nm, 'wb') as file: pickle.dump(c, file) return c
def test_tqdm_compatibility(): prog = ProgIter(range(20), total=20, miniters=17, show_times=False) assert prog.pos == 0 assert prog.freq == 17 for _ in prog: pass with CaptureStdout() as cap: ProgIter.write('foo') assert cap.text.strip() == 'foo' with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description('new desc', refresh=False) prog.begin() prog.refresh() prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description('new desc', refresh=True) prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description_str('new desc') prog.begin() prog.refresh() prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_postfix({'foo': 'bar'}, baz='biz', x=object(), y=2) prog.begin() assert prog.length is None assert 'foo=bar' in cap.text.strip() assert 'baz=biz' in cap.text.strip() assert 'y=2' in cap.text.strip() assert 'x=<object' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_postfix_str('bar baz', refresh=False) assert 'bar baz' not in cap.text.strip()
def time_progiter_overhead(): # Time the overhead of this function import timeit import textwrap setup = textwrap.dedent(''' from sklearn.externals.progiter import ProgIter import numpy as np import time from six.moves import cStringIO, range import utool as ut N = 500 file = cStringIO() rng = np.random.RandomState(42) ndims = 2 vec1 = rng.rand(113, ndims) vec2 = rng.rand(71, ndims) def minimal_wraper1(sequence): for item in sequence: yield item def minimal_wraper2(sequence): for count, item in enumerate(sequence, start=1): yield item def minimal_wraper3(sequence): count = 0 for item in sequence: yield item count += 1 def minwrap4(sequence): for count, item in enumerate(sequence, start=1): yield item if count % 100: pass def minwrap5(sequence): for count, item in enumerate(sequence, start=1): yield item if time.time() < 100: pass ''') statements = { 'baseline': '[{work} for n in range(N)]', 'creation': 'ProgIter(range(N))', 'minwrap1': '[{work} for n in minimal_wraper1(range(N))]', 'minwrap2': '[{work} for n in minimal_wraper2(range(N))]', 'minwrap3': '[{work} for n in minimal_wraper3(range(N))]', 'minwrap4': '[{work} for n in minwrap4(range(N))]', 'minwrap5': '[{work} for n in minwrap5(range(N))]', '(sk-disabled)': '[{work} for n in ProgIter(range(N), enabled=False, file=file)]', # NOQA '(sk-plain)': '[{work} for n in ProgIter(range(N), file=file)]', # NOQA '(sk-freq)': '[{work} for n in ProgIter(range(N), file=file, freq=100)]', # NOQA '(sk-no-adjust)': '[{work} for n in ProgIter(range(N), file=file, adjust=False, freq=200)]', # NOQA '(sk-high-freq)': '[{work} for n in ProgIter(range(N), file=file, adjust=False, freq=200)]', # NOQA # '(ut-disabled)' : '[{work} for n in ut.ProgIter(range(N), enabled=False, file=file)]', # NOQA # '(ut-plain)' : '[{work} for n in ut.ProgIter(range(N), file=file)]', # NOQA # '(ut-freq)' : '[{work} for n in ut.ProgIter(range(N), freq=100, file=file)]', # NOQA # '(ut-no-adjust)' : '[{work} for n in ut.ProgIter(range(N), freq=200, adjust=False, file=file)]', # NOQA # '(ut-high-freq)' : '[{work} for n in ut.ProgIter(range(N), file=file, adjust=False, freq=200)]', # NOQA } # statements = { # 'calc_baseline': '[vec1.dot(vec2.T) for n in range(M)]', # NOQA # 'calc_plain': '[vec1.dot(vec2.T) for n in ProgIter(range(M), file=file)]', # NOQA # 'calc_plain_ut': '[vec1.dot(vec2.T) for n in ut.ProgIter(range(M), file=file)]', # NOQA # } timeings = {} work_strs = [ 'None', 'vec1.dot(vec2.T)', 'n % 10 == 0', ] work = work_strs[0] # work = work_strs[1] number = 10000 prog = ProgIter(desc='timing', adjust=True) for key, stmt in prog(statements.items()): prog.set_extra(key) secs = timeit.timeit(stmt.format(work=work), setup, number=number) timeings[key] = secs / number
def test_progiter(): # Define a function that takes some time def is_prime(n): return n >= 2 and not any(n % i == 0 for i in range(2, n)) N = 500 if False: file = cStringIO() prog = ProgIter(range(N), clearline=False, file=file, freq=N // 10, adjust=False) file.seek(0) print(file.read()) prog = ProgIter(range(N), clearline=False) for n in prog: was_prime = is_prime(n) prog.set_extra('n=%r, was_prime=%r' % ( n, was_prime, )) if (n + 1) % 128 == 0 and was_prime: prog.set_extra('n=%r, was_prime=%r EXTRA' % ( n, was_prime, )) file.seek(0) print(file.read()) total = 200 N = 5000 N0 = N - total print('N = %r' % (N, )) print('N0 = %r' % (N0, )) print('\n-----') print('Demo #0: progress can be disabled and incur essentially 0 overhead') print('However, the overhead of enabled progress is minimal and typically ' 'insignificant') print('this is verbosity mode verbose=0') sequence = (is_prime(n) for n in range(N0, N)) # with ub.Timer('demo0'): if True: psequence = ProgIter(sequence, total=total, desc='demo0', enabled=False) list(psequence) print('\n-----') print('Demo #1: progress is shown by default in the same line') print('this is verbosity mode verbose=1') sequence = (is_prime(n) for n in range(N0, N)) # with ub.Timer('demo1'): if True: psequence = ProgIter(sequence, total=total, desc='demo1') list(psequence) # Default behavior adjusts frequency of progress reporting so # the performance of the loop is minimally impacted print('\n-----') print('Demo #2: clearline=False prints multiple lines.') print('Progress is only printed as needed') print('Notice the adjustment behavior of the print frequency') print('this is verbosity mode verbose=2') # with ub.Timer('demo2'): if True: sequence = (is_prime(n) for n in range(N0, N)) psequence = ProgIter(sequence, total=total, clearline=False, desc='demo2') list(psequence) # import utool as ut # print(ut.repr4(psequence.__dict__)) print('\n-----') print('Demo #3: Adjustments can be turned off to give constant feedback') print('this is verbosity mode verbose=3') sequence = (is_prime(n) for n in range(N0, N)) # with ub.Timer('demo3'): if True: psequence = ProgIter(sequence, total=total, adjust=False, clearline=False, freq=100, desc='demo3') list(psequence)
if not isinstance(SL, complex): count = count_B(SL, count) count = count_B(SL, count) return count def count(S, counter): N = math.log(S) / math.log(3) // 1 return counter(S, N) Mmax = 1000 M = [4 + i**2 for i in range(Mmax)] cb = [count(m, count_B) for m in ProgIter(M)] cc = [count(m, count_C) for m in ProgIter(M)] f0 = plt.figure() plt.plot(M, cb, color="C0", label='Numeric data B: no bound') plt.plot(M, cc, color="C2", label='Numeric data C: with bound') plt.plot(M, [2 * x * math.log(x) / math.log(27) - 2 / 3 * x + 1 for x in M], 'r:', label="Analytic function $O(N\log_{27}N$)") plt.legend() plt.ylabel("operations") plt.xlabel("N") plt.title("Complexity of algorithm") f0.savefig("../figures/complexity_numeric_vs_analytic.pdf", transparent=True, format="pdf",