def hyperpack(hyperpack_list): ''' Install Large Packs Of Applications And Packages ''' os_bar = IncrementalBar('Getting Operating System...', max=1) os_bar.next() installer = Installer() updater = Updater() cleaner = Uninstaller() hyperpacks = hyperpack_list.split(',') password = "" if platform == 'linux' or platform == 'darwin': password = getpass('Enter your password: '******'\n') password_bar = IncrementalBar('Verifying Password...', max=1) exitcode = is_password_valid(password) if exitcode == 1: click.echo('Wrong Password Entered... Aborting Installation!') return password_bar.next() click.echo('\n') if platform == 'linux': for hyperpack in hyperpacks: hyper_pack = hyperpkgs[hyperpack] packages = hyper_pack.packages.split(',') apps = hyper_pack.applications.split(',') # Installing Required Packages for package in packages: installer.install_task( devpackages_linux[package], f'sudo -S apt-get install -y {package}', password, f'{package} --version', [f'{devpackages_linux[package]} Version']) # Installing Required Applications for app in apps: installer.install_task( applications_linux[app], f'sudo -S snap install --classic {app}', password, '', []) # Updating Required Packages for package in packages: updater.updatepack(package, password) for app in apps: updater.updateapp(app, password) cleaner.clean(password) elif platform == 'win32': for hyperpack in hyperpacks: hyper_pack = hyperpkgs[hyperpack] packages = hyper_pack.packages.split(',') apps = hyper_pack.applications.split(',') for package in packages: installer.install_task( package_name=devpackages_windows[package], script=f'choco install {package} -y', password="", test_script=f'{package} --version', tests_passed=[f'{devpackages_windows[package]} Version']) for package in packages: updater.updatepack(package, password="") for app in apps: installer.install_task(package_name=applications_windows[app], script=f'choco install {app} -y', password="", test_script='', tests_passed=[]) for app in apps: updater.updateapp(app, password="") elif platform == 'darwin': for hyperpack in hyperpacks: hyper_pack = hyperpkgs[hyperpack] packages = hyper_pack.packages.split(',') apps = hyper_pack.applications.split(',') for package in packages: installer.install_task( package_name=devpackages_macos[package], script=f'brew install {package}', password="", test_script=f'{package} --version', tests_passed=[f'{devpackages_macos[package]} Version']) for package in packages: updater.updatepack(package, password="") for app in apps: installer.install_task(package_name=applications_macos[app], script=f'brew cask install {app}', password="", test_script='', tests_passed=[]) for app in apps: updater.updateapp(app, password="")
def _quasi_public_meta_clonotypes( clone_df, pwmat, tcrsampler, cdr3_name='cdr3_d_aa', v_gene_name='v_d_gene', nr_filter=True, output_html_name="quasi_public_clones.html", sort_columns=['nsubject', 'K_neighbors'], sort_ascending=False, labels=[ 'clone_id', 'cdr3_d_aa', 'v_d_gene', 'j_d_gene', 'radius', 'neighbors', 'K_neighbors', #'cdr3s', 'nsubject', 'qpublic', 'cdr3_d_aa.summary', 'v_d_gene.summary', 'j_d_gene.summary', 'subject.summary' ], fixed_radius=False, radius=None, query_str='qpublic == True & K_neighbors > 1', kargs_member_summ={ 'key_col': 'neighbors', 'count_col': 'count', 'addl_cols': ['subject'], 'addl_n': 4 }, kargs_motif={ 'pwmat_str': 'pw_delta', 'cdr3_name': 'cdr3_d_aa', 'v_name': 'v_d_gene', 'gene_names': ['v_d_gene', 'j_d_gene'] }): """ _quasi_public_meta_clonotypes Parameters ---------- clone_df : pd.DataFrame Clones information with standard tcrdist3 column names. pwmat : np.array Pairwise distances tcrsamper : tcrsampler.TCRsampler TCRSampler instance initialized with appropriate background set. cdr3_name : str Column name for amino acid CDR3 e.g., 'cdr3_d_aa'. v_gene_name : str Column name for TR[ABGD]V gene e.g., 'v_d_gene'. nr_filter : bool If True, sequqences with the exact same neighbors as another set will be dropped output_html_name : str Filename for the output html output. labels : list List of columns to display on html output beneath each logo plot. fixed_radius : False If False, clone_df must have a column radius. If True, argument radius will be used to define maximum distance from centroid to neighboring TCR. radius : int or None Theshold distance (<=) for neighborhood membership. If int, then all centroids will be assigned the same radius. Alterntively radius can be provided for each centroid sequence by including radius as a numeric column in clone_df. query_str : str The string to include sequences in output. For instance 'qpublic == True and K_neighbors > 3', implies that only grouping of 4 or more TCRs from at leåast two individuals will be retained. Alternatively, 'nsubject > 1' or 'qpublic == True' could be used as true minimum requirements for quasi-publicity. kargs_member_summ : dict kwargs kargs_motif : dict kwargs for the motif genertation Returns ------- Returns DataFrames in a Dictionary. nn_summary : pd.DataFrame DataFrame matchign clone_df with summary measures added quasi_public_df pd.DataFrame Dataframe with only those rows that match the <query_str> and nr_filter. {'nn_summary': nn_summary : pd.DataFrame, 'quasi_public_df': quasi_public_df : nn_summary : pd.DataFrame} Notes ----- Importantly a html file is written displaying the quasi-public meta-clonotypes The easiest way to integrate this with existing nieghbor_diff add 'neighbors' and 'K_neighbors' to the clone df. Other columns could be added as well, and then displayed if added to the lis of labels. nn_clone_df = pd.concat([tr.clone_df, ndif[['neighbors', 'K_neighbors','val_0','ct_0']] ], axis = 1) Examples -------- """ if 'neighbors' not in clone_df.columns: if fixed_radius: clone_df['radius'] = radius clone_df['neighbors'] = _neighbors_fixed_radius(pwmat=pwmat, radius=radius) else: assert 'radius' in clone_df.columns, "if not using fixed_radius, the clone_df must have a numeric 'radius' columns" clone_df['neighbors'] = _neighbors_variable_radius( pwmat=pwmat, radius_list=clone_df.radius) if 'K_neighbors' not in clone_df.columns: if fixed_radius: clone_df['K_neighbors'] = _K_neighbors_fixed_radius(pwmat=pwmat, radius=radius) else: clone_df['K_neighbors'] = _K_neighbors_variable_radius( pwmat=pwmat, radius_list=clone_df.radius) if 'nsubject' not in clone_df.columns: clone_df['nsubject'] = clone_df['neighbors'].\ apply(lambda x: clone_df['subject'].iloc[x].nunique()) if 'qpublic' not in clone_df.columns: clone_df['qpublic'] = clone_df['nsubject'].\ apply(lambda x: x > 1) nn_summary = member_summ(res_df=clone_df, clone_df=clone_df, **kargs_member_summ) nn_summary = nn_summary.rename( columns={k: f'{k}.summary' for k in nn_summary.columns}) clone_df['cdr3s'] = clone_df['neighbors'].apply( lambda x: clone_df[cdr3_name].iloc[x].to_list()) clone_df = pd.concat([clone_df, nn_summary], axis=1) quasi_public_df = clone_df.query(query_str).\ sort_values(sort_columns, ascending = sort_ascending).\ reset_index(drop = True).\ copy() if quasi_public_df.shape[0] == 0: raise ValueError( "UNFORTUNATELY NO QUASI PUBLIC CLOONES WERE FOUND, CONSIDER YOUR QUERY STRINGENCY" ) quasi_public_df['unique_set'] = test_for_subsets( quasi_public_df['neighbors']) if nr_filter: quasi_public_df = filter_is(quasi_public_df, 'unique_set', 1).reset_index(drop=True) print( f"GENERATING {quasi_public_df.shape[0]} QUASI-PUBLIC MOTIFS SATISFYING {query_str}" ) bar = IncrementalBar('Processing', max=quasi_public_df.shape[0]) svgs = list() svgs_raw = list() for i, r in quasi_public_df.iterrows(): bar.next() centroid = r[cdr3_name] v_gene = r[v_gene_name] svg, svg_raw = make_motif_logo(tcrsampler=tcrsampler, pwmat=pwmat, clone_df=clone_df, centroid=centroid, v_gene=v_gene, radius=r['radius'], **kargs_motif) svgs.append(svg) svgs_raw.append(svg_raw) bar.next() bar.finish() quasi_public_df['svg'] = svgs quasi_public_df['svg_raw'] = svgs_raw def shrink(s): s = s.replace('height="100%"', 'height="20%"') s = s.replace('width="100%"', 'width="20%"') return s print(labels) with open(output_html_name, 'w') as output_handle: for i, r in quasi_public_df.iterrows(): #import pdb; pdb.set_trace() svg, svg_raw = r['svg'], r['svg_raw'] output_handle.write("<br></br>") output_handle.write(shrink(svg)) output_handle.write(shrink(svg_raw)) output_handle.write("<br></br>") output_handle.write(pd.DataFrame(r[labels]).transpose().to_html()) output_handle.write("<br></br>") return { 'nn_summary': nn_summary, 'quasi_public_df': quasi_public_df, 'clone_df': clone_df }
print('Title: ' + title) # Prints 'title' variable. print('Address: ' + address) # Prints 'address' variable. print('Website: ' + website) # Prints 'website' variable. print('Phone #: ' + phone) # Prints 'phone' variable. print('Hours of Operation: ') # Prints heading for hours. # Prints variable hours. print(hours[:9] + ': ' + hours[10:28]) print(hours[29:32] + ': ' + hours[33:51]) print(hours[52:55] + ': ' + hours[56:74]) print(hours[75:84] + ': ' + hours[85:103]) print('Review Overview: ') # Prints heading for Reviews. # Calls 'circles' function and prints what it returns and the variable 'rating'. print(circles(rating) + ': ' + str(rating) + ' Rating') excellentBar = IncrementalBar('Excellent: ', max=total) # Creates an IncrementalBar item. for i in range( excellent): # Iterates n times, n = number of excellent reviews. excellentBar.next() # Updates bar length. print(' : ' + str(int(round(excellent / total, 2) * 100)) + '%') # Prints number of reviews and percentage. verygoodBar = IncrementalBar('Very Good: ', max=total) # Creates an IncrementalBar item. for i in range(verygood): # Iterates n times, n = number of very good reviews. verygoodBar.next() # Updates bar length. print(' : ' + str(int(round(verygood / total, 2) * 100)) + '%') # Prints number of reviews and percentage. averageBar = IncrementalBar('Average: ', max=total) # Creates an IncrementalBar item.
''' Download a list of articles as specified by a CSV ''' import wikipedia_histories import pandas as pd from progress.bar import IncrementalBar sample = pd.read_csv('./subsample_depth_3.csv') sample = sample.loc[sample['Domain'] == 'politics'][387:] bar = IncrementalBar('Downloading articles... ', max=len(sample)) for page, domain in zip(sample['Pages'], sample['Domain']): bar.next() try: cur = wikipedia_histories.get_history(page) df = wikipedia_histories.build_df(cur) df.to_csv("./out/"+domain+"/"+page+'.csv') except Exception as e: print(e) bar.finish()
def laba3(db_file_name, count_range, schema, schema_data): results = { 'linear': [], 'binary': [], 'binary+sort': [], 'multimap': [], 'hashtable_map_good': [], 'hashtable_map_bad': [], 'bad_collisions': [], 'good_collisions': [] } key = 'fio' max_count_iterations = 2 iterations = len(count_range) bar = IncrementalBar('Countdown', max=iterations) bar.start() for count in count_range: bar.next() print('\n') for count_iterations in range(max_count_iterations): generate(db_file_name, count, schema, schema_data) fp_map = defaultdict(list) fp_list = load_fp_from_file(db_file_name) query_obj = random.choice(fp_list) query = getattr(query_obj, key) print('check lin') linear = check_time(linear_search)(fp_list, key, query) print('check sort+bin') sort_and_bin_search = check_time(sort_and_binary_seach)(fp_list, key, query) print('check bin') bin_search = check_time(binary_search)(fp_list, key, query) print('check multimap') map_search = check_time(fp_map.__getitem__)(query) print('check hashtable good') fp_custom_map_good = HashTable() for el in fp_list: el.set_hash_type('good') fp_map[getattr(el, key)].append(el) fp_custom_map_good.add(el) query_obj.set_hash_type('good') custom_map_good_search = check_time(fp_custom_map_good.get)( Hashes.good_hash(query)) print('check hashtable bad') fp_custom_map_bad = HashTable() for el in fp_list: el.set_hash_type('bad') fp_custom_map_bad.add(el) query_obj.set_hash_type('bad') custom_map_bad_search = check_time(fp_custom_map_bad.get)( Hashes.bad_hash(query)) results['linear'].append((count, linear)) results['binary'].append((count, bin_search)) results['binary+sort'].append((count, sort_and_bin_search)) results['multimap'].append((count, map_search)) results['hashtable_map_good'].append( (count, custom_map_good_search)) results['hashtable_map_bad'].append((count, custom_map_bad_search)) results['bad_collisions'].append( (count, fp_custom_map_bad.collision_count)) results['good_collisions'].append( (count, fp_custom_map_good.collision_count)) plot_graph(results, count_range, max_count_iterations) print('bad_collisions: ', results['bad_collisions']) print('good_collisions: ', results['good_collisions']) bar.finish() return results
dyn_spectra_chA = np.zeros( (int(data_block_size / 2), no_of_bunches_per_file), float) if Channel == 2: # Two channels mode dyn_spectra_chB = np.zeros( (int(data_block_size / 2), no_of_bunches_per_file), float) # !!! Fake timing. Real timing to be done!!! TimeFigureScaleFig = np.linspace(0, no_of_bunches_per_file, no_of_bunches_per_file + 1) for i in range(no_of_bunches_per_file): TimeFigureScaleFig[i] = str(TimeFigureScaleFig[i]) time_scale_bunch = [] bar = IncrementalBar(' File ' + str(fileNo + 1) + ' of ' + str(len(fileList)) + ' reading: ', max=no_of_bunches_per_file, suffix='%(percent)d%%') for bunch in range(no_of_bunches_per_file): bar.next() # Reading and reshaping all data with readers if Channel == 0 or Channel == 1: # Single channel mode wf_data = np.fromfile(file, dtype='i2', count=no_of_spectra_in_bunch * data_block_size) wf_data = np.reshape(wf_data, [data_block_size, no_of_spectra_in_bunch], order='F')
def animate_pixels(imfile1,imfile2,outfile,color=False,verbose=False): """Animates a pixel-motion transition between two images. Images must have the exact same number of pixels. Animation is saved as "outfile". Parameters ---------- imfile1 : str or file object The file name or file object for the first image imfile2 : str or file object The file name or file object for the second image outfile : str The output file name color : bool, optional If True, runs in color mode verbose : bool, optional If True, displays a progress bar in the console """ # Read in images if color: img1 = np.array(imread(imfile1))/255 img2 = np.array(imread(imfile2))/255 else: img1 = np.array(imread(imfile1,as_gray=True))/255 img2 = np.array(imread(imfile2,as_gray=True))/255 # Check number of pixels if img1.shape[0]*img1.shape[1] != img2.shape[0]*img2.shape[1]: raise ValueError("Images must have the name number of pixels") # Sort pixels by saturation (if grayscale) or hue (if color) if verbose: bar1 = IncrementalBar("Sorting\t\t", max=2,suffix='%(percent)d%%') if color: rows1,cols1,colors1 = color_to_coords(img1) else: rows1,cols1,colors1 = grayscale_to_coords(img1) if verbose: bar1.next() if color: rows2,cols2,colors2 = color_to_coords(img2) else: rows2,cols2,colors2 = grayscale_to_coords(img2) if verbose: bar1.next(); bar1.finish() # n is number of frames of one-directional transition # buffer is number of stationary frames before and after the transitions # total is number of frames for two transitions with 2 buffer periods each n=100 buffer = 10 total = 2*n+4*buffer # np.linspace creates evenly spaced position and color arrays for transition if verbose: bar2 = IncrementalBar("Interpolating\t",max=4,suffix='%(percent)d%%') colors = np.linspace(colors1,colors2,n) if verbose: bar2.next() rows = np.linspace(rows1+.5,rows2+.5,n) if verbose: bar2.next() cols = np.linspace(cols1+.5,cols2+.5,n) if verbose: bar2.next() pos = np.dstack((rows,cols)) if verbose: bar2.next(); bar2.finish() # Calculate the aspect ratio of the two images aspect_ratio1 = img1.shape[0]/img1.shape[1] aspect_ratio2 = img2.shape[0]/img2.shape[1] plt.ioff() # Figure will always have default matplotlib 6.4 inch width fig = plt.figure(figsize=(6.4,max(aspect_ratio1,aspect_ratio2)*6.4)) ax = fig.add_subplot(111) ax.set_aspect("equal") plt.axis("off") plt.xlim((0,max(img1.shape[1],img2.shape[1]))) plt.ylim((0,max(img1.shape[0],img2.shape[0]))) # Markers are measured in points, which are 1/72nd of an inch. Calculates # pixel size in points pixels = max(img1.shape[1],img2.shape[1]) pixels_per_inch = pixels/6.4 size = 72/pixels_per_inch # core object is a scatter plot with square markers set to pixel size if color: points = ax.scatter(rows[0],cols[0],c=colors1,marker='s',s=size**2) else: points = ax.scatter(rows[0],cols[0],c=colors1,cmap="gray",marker='s',s=size**2,vmin=0,vmax=1) # update function changes the scatter plot at each frame # set_color works for rgb, set_array works for grayscale def update(j): if j >= buffer and j < buffer+n: i = j-buffer points.set_offsets(pos[i]) if color: points.set_color(colors[i]) else: points.set_array(colors[i]) elif j >= 3*buffer+n and j < 3*buffer+2*n: i = n-(j-(3*buffer+n))-1 points.set_offsets(pos[i]) if color: points.set_color(colors[i]) else: points.set_array(colors[i]) if verbose: bar3.next() if verbose: bar3 = IncrementalBar("Rendering\t",max=total,suffix='%(percent)d%%') # Create FuncAnimation with 60-millisecond inteval between frames ani = animation.FuncAnimation(fig,update,frames=total,interval=60) # Save animation and close the figure ani.save(outfile) if verbose: bar3.next(); bar3.finish() plt.close(fig) plt.ion()
def find_metaclonotypes( project_path = "tutorial48", source_path = os.path.join(path_to_base,'tcrdist','data','covid19'), antigen_enriched_file = 'mira_epitope_48_610_YLQPRTFL_YLQPRTFLL_YYVGYLQPRTF.tcrdist3.csv', ncpus = 4, seed = 3434): """ This functions encapsulates a complete workflow for finding meta-clonotypes in antigen-enriched data. """ np.random.seed(seed) if not os.path.isdir(project_path): os.mkdir(project_path) ############################################################################ # Step 1: Select and load a antigen-enriched (sub)repertoire. #### ############################################################################ print(f"INITIATING A TCRrep() with {antigen_enriched_file}") assert os.path.isfile(os.path.join(source_path, antigen_enriched_file)) # Read file into a Pandas DataFrame <df> df = pd.read_csv(os.path.join(source_path, antigen_enriched_file)) # Drop cells without any gene usage information df = df[( df['v_b_gene'].notna() ) & (df['j_b_gene'].notna()) ] # Initialize a TCRrep class, using ONLY columns that are complete and unique define a a clone. # Class provides a 'count' column if non is present # Counts of identical subject:VCDR3 'clones' will be aggregated into a TCRrep.clone_df. from tcrdist.repertoire import TCRrep tr = TCRrep(cell_df = df[['subject','cell_type','v_b_gene', 'j_b_gene', 'cdr3_b_aa']], organism = "human", chains = ['beta'], compute_distances = True) tr.cpus = ncpus ############################################################################ # Step 1.1: Estimate Probability of Generation #### ############################################################################ ### It will be useful later to know the pgen of each from tcrdist.automate import auto_pgen print(f"COMPUTING PGEN WITH OLGA (Sethna et al 2018)") print("FOR ANTIGEN-ENRICHED CLONES TO BE USED FOR SUBSEQUENT ANALYSES") auto_pgen(tr) # Tip: Users of tcrdist3 should be aware that by default a <TCRrep.clone_df> # DataFrame is created out of non-redundant cells in the cell_df, and # pairwise distance matrices automatically computed. # Notice that attributes <tr.clone_df> and <tr.pw_beta> , <tr.pw_cdr3_b_aa>, # are immediately accessible. # Attributes <tr.pw_pmhc_b_aa>, <tr.pw_cdr2_b_aa>, and <tr.pw_cdr1_b_aa> # are also available if <TCRrep.store_all_cdr> is set to True. # For large datasets, i.e., >15,000 clones, this approach may consume too much # memory so <TCRrep.compute_distances> is automatically set to False. ############################################################################ # Step 2: Synthesize an Inverse Probability Weighted VJ Matched Background # ############################################################################ # Generating an appropriate set of unenriched reference TCRs is important; for # each set of antigen-associated TCRs, discovered by MIRA, we created a two part # background. One part consists of 100,000 synthetic TCRs whose V-gene and J-gene # frequencies match those in the antigen-enriched repertoire, using the software # OLGA (Sethna et al. 2019; Marcou et al. 2018). The other part consists of # 100,000 umbilical cord blood TCRs sampled uniformly from 8 subjects (Britanova # et al., 2017). This mix balances dense sampling of sequences near the # biochemical neighborhoods of interest with broad sampling of TCRs from an # antigen-naive repertoire. Importantly, we adjust for the biased sampling by # using the V- and J-gene frequencies observed in the cord-blood data (see # Methods for details about inverse probability weighting adjustment). Using this # approach we are able to estimate the abundance of TCRs similar to a centroid # TCR in an unenriched background repertoire of ~1,000,000 TCRs, using a # comparatively modest background dataset of 200,000 TCRs. While this estimate # may underestimate the true specificity, since some of the neighborhood TCRs in # the unenriched background repertoire may in fact recognize the antigen of # interest, it is useful for prioritizing neighborhoods and selecting a radius # for each neighborhood that balances sensitivity and specificity. # Initialize a TCRsampler -- human, beta, umbilical cord blood from 8 people. print(f"USING tcrsampler TO CONSTRUCT A CUSTOM V-J MATCHED BACKGROUND") from tcrsampler.sampler import TCRsampler ts = TCRsampler(default_background = 'britanova_human_beta_t_cb.tsv.sampler.tsv') # Stratify sample so that each subject contributes similarly to estimate of # gene usage frequency from tcrdist.background import get_stratified_gene_usage_frequency ts = get_stratified_gene_usage_frequency(ts = ts, replace = True) # Synthesize an inverse probability weighted V,J gene background that matches # usage in your enriched repertoire df_vj_background = tr.synthesize_vj_matched_background(ts = ts, chain = 'beta') # Get a randomly drawn stratified sampler of beta, cord blood from # Britanova et al. 2016 # Dynamics of Individual T Cell Repertoires: From Cord Blood to Centenarians from tcrdist.background import sample_britanova df_britanova_100K = sample_britanova(size = 100000) # Append frequency columns using, using sampler above df_britanova_100K = get_gene_frequencies(ts = ts, df = df_britanova_100K) df_britanova_100K['weights'] = 1 df_britanova_100K['source'] = "stratified_random" # Combine the two parts of the background into a single DataFrame df_bkgd = pd.concat([df_vj_background.copy(), df_britanova_100K.copy()], axis = 0).\ reset_index(drop = True) # Assert that the backgrounds have the expected number of rows. assert df_bkgd.shape[0] == 200000 # Save the background for future use background_outfile = os.path.join(project_path, f"{antigen_enriched_file}.olga100K_brit100K_bkgd.csv") print(f'WRITING {background_outfile}') df_bkgd.to_csv(background_outfile, index = False) # Load the background to a TCRrep without computing pairwise distances # (i.e., compute_distances = False) tr_bkgd = TCRrep( cell_df = df_bkgd, organism = "human", chains = ['beta'], compute_distances = False) # Compute rectangular distances. Those are, distances between each clone in # the antigen-enriched repertoire and each TCR in the background. # With a single 1 CPU and < 10GB RAM, 5E2x2E5 = 100 million pairwise distances, # across CDR1, CDR2, CDR2.5, and CDR3 # 1min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) # %timeit -r 1 tr.compute_rect_distances(df = tr.clone_df, df2 = tr_bkdg.clone_df, store = False) ############################################################################ # Step 4: Calculate Distances ##### ############################################################################ print(f"COMPUTING RECTANGULARE DISTANCE") tr.compute_sparse_rect_distances( df = tr.clone_df, df2 = tr_bkgd.clone_df, radius=50, chunk_size = 100) scipy.sparse.save_npz(os.path.join(project_path, f"{antigen_enriched_file}.rw_beta.npz"), tr.rw_beta) # Tip: For larger dataset you can use a sparse implementation: # 30.8 s ± 0 ns per loop ; tr.cpus = 6 # %timeit -r tr.compute_sparse_rect_distances(df = tr.clone_df, df2 = tr_bkdg.clone_df,radius=50, chunk_size=85) ############################################################################ # Step 5: Examine Density ECDFS ##### ############################################################################ # Investigate the density of neighbors to each TCR, based on expanding # distance radius. from tcrdist.ecdf import distance_ecdf, _plot_manuscript_ecdfs import matplotlib.pyplot as plt # Compute empirical cumulative density function (ecdf) # Compare Antigen Enriched TCRs (against itself). thresholds, antigen_enriched_ecdf = distance_ecdf( tr.pw_beta, thresholds=range(0,50,2)) # Compute empirical cumulative density function (ecdf) # Compare Antigen Enriched TCRs (against) 200K probability # inverse weighted background thresholds, background_ecdf = distance_ecdf( tr.rw_beta, thresholds=range(0,50,2), weights= tr_bkgd.clone_df['weights'], absolute_weight = True) # plot_ecdf similar to tcrdist3 manuscript # antigen_enriched_ecdf[antigen_enriched_ecdf == antigen_enriched_ecdf.min()] = 1E-10 f1 = _plot_manuscript_ecdfs( thresholds, antigen_enriched_ecdf, ylab= 'Proportion of Antigen Enriched TCRs', cdr3_len=tr.clone_df.cdr3_b_aa.str.len(), min_freq=1E-10) f1.savefig(os.path.join(project_path, f'{antigen_enriched_file}.ecdf_AER_plot.png')) f2 = _plot_manuscript_ecdfs( thresholds, background_ecdf, ylab= 'Proportion of Reference TCRs', cdr3_len=tr.clone_df.cdr3_b_aa.str.len(), min_freq=1E-10) f2.savefig(os.path.join(project_path, f'{antigen_enriched_file}.ecdf_BUR_plot.png')) ############################################################################ # Step 6: Find optimal radii (theta = 1E5 ##### ############################################################################ # To ascertain which meta-clonotypes are likely to be most specific, # take advantage of an existing function <bkgd_cntrl_nn2>. # d888 .d8888b. 8888888888 888888888 # d8888 d88P Y88b 888 888 # 888 888 888 888 888 # 888 888 888 8888888 8888888b. # 888 888 888 888 "Y88b # 888 888 888 888 888888 888 # 888 Y88b d88P 888 Y88b d88P # 8888888 "Y8888P" 8888888888 "Y8888P" level_tag = '1E5' from tcrdist.neighbors import bkgd_cntl_nn2 centers_df = bkgd_cntl_nn2( tr = tr, tr_background = tr_bkgd, weights = tr_bkgd.clone_df.weights, ctrl_bkgd = 10**-5, col = 'cdr3_b_aa', add_cols = ['v_b_gene', 'j_b_gene'], ncpus = 4, include_seq_info = True, thresholds = [x for x in range(0,50,2)], generate_regex = True, test_regex = True, forced_max_radius = 36) ############################################################################ # Step 6.2: (theta = 1E5) ALL meta-clonotypes .tsv file ## ############################################################################ # save center to project_path for future use centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) # Many of meta-clonotypes contain redundant information. # We can winnow down to less-redundant list. We do this # by ranking clonotypes from most to least specific. # <min_nsubject> is minimum publicity of the meta-clonotype, # <min_nr> is minimum non-redundancy # Add neighbors, K_neighbors, and nsubject columns from tcrdist.public import _neighbors_variable_radius, _neighbors_sparse_variable_radius centers_df['neighbors'] = _neighbors_variable_radius(pwmat=tr.pw_beta, radius_list = centers_df['radius']) centers_df['K_neighbors'] = centers_df['neighbors'].apply(lambda x : len(x)) # We determine how many <nsubjects> are in the set of neighbors centers_df['nsubject'] = centers_df['neighbors'].\ apply(lambda x: tr.clone_df['subject'].iloc[x].nunique()) centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) from tcrdist.centers import rank_centers ranked_centers_df = rank_centers( centers_df = centers_df, rank_column = 'chi2joint', min_nsubject = 2, min_nr = 1) ############################################################################ # Step 6.3: (theta = 1E5) NR meta-clonotypes .tsv file ### ############################################################################ # Output, ready to search bulk data. ranked_centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) ############################################################################ # Step 6.4: (theta = 1E5) Output Meta-Clonotypes HTML Summary ### ############################################################################ # Here we can make a svg logo for each NR meta-clonotype if ranked_centers_df.shape[0] > 0: from progress.bar import IncrementalBar from tcrdist.public import make_motif_logo cdr3_name = 'cdr3_b_aa' v_gene_name = 'v_b_gene' svgs = list() svgs_raw = list() bar = IncrementalBar('Processing', max = ranked_centers_df.shape[0]) for i,r in ranked_centers_df.iterrows(): bar.next() centroid = r[cdr3_name] v_gene = r[v_gene_name] svg, svg_raw = make_motif_logo( tcrsampler = ts, pwmat = tr.pw_beta, clone_df = tr.clone_df, centroid = centroid , v_gene = v_gene , radius = r['radius'], pwmat_str = 'pw_beta', cdr3_name = 'cdr3_b_aa', v_name = 'v_b_gene', gene_names = ['v_b_gene','j_b_gene']) svgs.append(svg) svgs_raw.append(svg_raw) bar.next();bar.finish() ranked_centers_df['svg'] = svgs ranked_centers_df['svg_raw'] = svgs_raw def shrink(s): return s.replace('height="100%"', 'height="20%"').replace('width="100%"', 'width="20%"') labels =['cdr3_b_aa','v_b_gene', 'j_b_gene', 'pgen', 'radius', 'regex','nsubject','K_neighbors', 'bkgd_hits_weighted','chi2dist','chi2re','chi2joint'] output_html_name = os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.html') # 888 888 88888888888 888b d888 888 # 888 888 888 8888b d8888 888 # 888 888 888 88888b.d88888 888 # 8888888888 888 888Y88888P888 888 # 888 888 888 888 Y888P 888 888 # 888 888 888 888 Y8P 888 888 # 888 888 888 888 " 888 888 # 888 888 888 888 888 88888888 with open(output_html_name, 'w') as output_handle: for i,r in ranked_centers_df.iterrows(): #import pdb; pdb.set_trace() svg, svg_raw = r['svg'],r['svg_raw'] output_handle.write("<br></br>") output_handle.write(shrink(svg)) output_handle.write(shrink(svg_raw)) output_handle.write("<br></br>") output_handle.write(pd.DataFrame(r[labels]).transpose().to_html()) output_handle.write("<br></br>") # To ascertain which meta-clonotypes are likely to be most specific, # take advantage of an existing function <bkgd_cntrl_nn2>. # d888 .d8888b. 8888888888 .d8888b. # d8888 d88P Y88b 888 d88P Y88b # 888 888 888 888 888 # 888 888 888 8888888 888d888b. # 888 888 888 888 888P "Y88b # 888 888 888 888 888888 888 888 # 888 Y88b d88P 888 Y88b d88P # 8888888 "Y8888P" 8888888888 "Y8888P" ############################################################################ # Step 6.5: Find optimal radii (theta = 1E6) ### ############################################################################ level_tag = '1E6' from tcrdist.neighbors import bkgd_cntl_nn2 centers_df = bkgd_cntl_nn2( tr = tr, tr_background = tr_bkgd, weights = tr_bkgd.clone_df.weights, ctrl_bkgd = 10**-6, col = 'cdr3_b_aa', add_cols = ['v_b_gene', 'j_b_gene'], ncpus = 4, include_seq_info = True, thresholds = [x for x in range(0,50,2)], generate_regex = True, test_regex = True, forced_max_radius = 36) ############################################################################ # Step 6.6: (theta = 1E6) ALL meta-clonotypes .tsv file ## ############################################################################ # save center to project_path for future use centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) # Many of meta-clonotypes contain redundant information. # We can winnow down to less-redundant list. We do this # by ranking clonotypes from most to least specific. # <min_nsubject> is minimum publicity of the meta-clonotype, # <min_nr> is minimum non-redundancy # Add neighbors, K_neighbors, and nsubject columns from tcrdist.public import _neighbors_variable_radius, _neighbors_sparse_variable_radius centers_df['neighbors'] = _neighbors_variable_radius(pwmat=tr.pw_beta, radius_list = centers_df['radius']) centers_df['K_neighbors'] = centers_df['neighbors'].apply(lambda x : len(x)) # We determine how many <nsubjects> are in the set of neighbors centers_df['nsubject'] = centers_df['neighbors'].\ apply(lambda x: tr.clone_df['subject'].iloc[x].nunique()) centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) from tcrdist.centers import rank_centers ranked_centers_df = rank_centers( centers_df = centers_df, rank_column = 'chi2joint', min_nsubject = 2, min_nr = 1) ############################################################################ # Step 6.7: (theta = 1E6) NR meta-clonotypes .tsv file ### ############################################################################ # Output, ready to search bulk data. ranked_centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) ############################################################################ # Step 6.8: (theta = 1E6) Output Meta-Clonotypes HTML Summary ### ############################################################################ # Here we can make a svg logo for each meta-clonotype from progress.bar import IncrementalBar from tcrdist.public import make_motif_logo if ranked_centers_df.shape[0] > 0: cdr3_name = 'cdr3_b_aa' v_gene_name = 'v_b_gene' svgs = list() svgs_raw = list() bar = IncrementalBar('Processing', max = ranked_centers_df.shape[0]) for i,r in ranked_centers_df.iterrows(): bar.next() centroid = r[cdr3_name] v_gene = r[v_gene_name] svg, svg_raw = make_motif_logo( tcrsampler = ts, pwmat = tr.pw_beta, clone_df = tr.clone_df, centroid = centroid , v_gene = v_gene , radius = r['radius'], pwmat_str = 'pw_beta', cdr3_name = 'cdr3_b_aa', v_name = 'v_b_gene', gene_names = ['v_b_gene','j_b_gene']) svgs.append(svg) svgs_raw.append(svg_raw) bar.next();bar.finish() ranked_centers_df['svg'] = svgs ranked_centers_df['svg_raw'] = svgs_raw def shrink(s): return s.replace('height="100%"', 'height="20%"').replace('width="100%"', 'width="20%"') labels =['cdr3_b_aa', 'v_b_gene', 'j_b_gene', 'pgen', 'radius', 'regex','nsubject','K_neighbors', 'bkgd_hits_weighted','chi2dist','chi2re','chi2joint'] output_html_name = os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.html') # 888 888 88888888888 888b d888 888 # 888 888 888 8888b d8888 888 # 888 888 888 88888b.d88888 888 # 8888888888 888 888Y88888P888 888 # 888 888 888 888 Y888P 888 888 # 888 888 888 888 Y8P 888 888 # 888 888 888 888 " 888 888 # 888 888 888 888 888 88888888 with open(output_html_name, 'w') as output_handle: for i,r in ranked_centers_df.iterrows(): #import pdb; pdb.set_trace() svg, svg_raw = r['svg'],r['svg_raw'] output_handle.write("<br></br>") output_handle.write(shrink(svg)) output_handle.write(shrink(svg_raw)) output_handle.write("<br></br>") output_handle.write(pd.DataFrame(r[labels]).transpose().to_html()) output_handle.write("<br></br>")
def scrapezillowdata(zillow_urls, header_input): # Initialize progress bar bar = IncrementalBar(" Scraping Zillow", max=len(zillow_urls)) # Initialize list to store home data during loop over each home's Zillow url home_data_list = [] # Loop over each home Zillow URL and scrape pertinent details for url in zillow_urls: # First, obtain the HTML from the current home Zillow URL using gethtml.py home_html = gethtml(url, header_input) # The home address is simply taken directly from its own URL. home_address = (url.replace("https://www.zillow.com/homedetails/", "").replace("-", " ").split("/", 1)[0]) # First, we search for the home's sell price. In Zillow, this variable is under a # "span" class="ds-status-details" tag. The find method will find this variable and store it into a tag # (i.e. ds_status_details). Generally, Zillow will show "Sold" and the sell price in this tag. Therefore, we # check this tag for the key word "sold" that we know will generally be contained in the tag's text. If the key # word is found in the tag's text, then we store the text found in the tag into the appropriate variable while # removing the unwanted characters. If the key word is not found, then the appropriate variable will retain its # initialization value of "n/a". ds_status_details = home_html.find("span", class_="ds-status-details") sold_price = "n/a" if "sold" in ds_status_details.text.lower(): sold_price = (ds_status_details.text.replace("Sold", "").replace( ": $", "").replace(",", "")) # Next, we search for the number of beds, baths, and the home's square footage. In Zillow, each one of these # variables is under a "span" class="ds-bed-bath-living-area" tag. The find_all method will find each one of # these variables and store them into a result set (i.e. ds_bed_bath_living_area). Each item of the result set # will either contain number of beds and "bd", number of baths and "ba", or the home's size and "Square Feet". # We loop over the result set checking each item for key words that we know will be contained in # the item's text. If the key word is found in the item's text, then we store the text found in the item into # the appropriate variable while removing the unwanted characters. If the key word is not found, then the # appropriate variable will retain its initialization value of "n/a". ds_bed_bath_living_area = home_html.find_all( "span", class_="ds-bed-bath-living-area") beds = "n/a" baths = "n/a" size = "n/a" for item in ds_bed_bath_living_area: if "bd" in item.text.lower(): beds = item.text.replace(" bd", "") continue if "ba" in item.text.lower(): baths = item.text.replace(" ba", "") continue if "square feet" in item.text.lower() or "sqft" in item.text.lower( ): size = item.text.replace(",", "").replace("Square Feet", "sqft") continue # Next, we search for the home type, year built, heating, cooling, parking, and lot size. In Zillow, each one of # these variables is under a "li" class="ds-home-fact-list-item" tag. The find_all method will find each one of # these variables and store them into a result set (i.e. ds_home_fact_list_items). Each item of the result set # has a child "span" class="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" tag (i.e. the "label" tag) # AND a child "span" class="Text-c11n-8-11-1__aiai24-0 hqfqED" tag (i.e. the "value" tag). # For example, for "home type" information (generally the first item in the result set), there will be a # "label" tag that will contain the text "Type" and there will be a "value" tag that will contain the text # "Single Family". We loop over the result set checking each item's "label" tag for key words that we know will # be contained in that tag. If the key word is found in the item's "label" tag, then we store the text found in # the item's adjacent "value" tag into the appropriate variable while removing the unwanted characters. # If the key word is not found, then the appropriate variable will retain its initialization value of "n/a". ds_home_fact_list_items = home_html.find_all( "li", class_="ds-home-fact-list-item") home_type = "n/a" year_built = "n/a" heating = "n/a" cooling = "n/a" parking = "n/a" lot_size = "n/a" for item in ds_home_fact_list_items: if ("type" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): home_type = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("year built" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): year_built = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("heating" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): heating = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("cooling" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): cooling = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("parking" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): parking = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("lot" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): lot_size = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text.replace( ",", "") continue # Append home data information to list home_data_list.append([ home_address, sold_price, beds, baths, size, home_type, year_built, heating, cooling, parking, lot_size, ]) bar.next() # to advance progress bar bar.finish() # to finish the progress bar print() # to add space following progress bar # Convert home_data_list into pandas dataframe. home_data = list2frame(home_data_list) return home_data
# instantiating the CSV_Writer, which will create a CSV file in the output directory csv = CSV_Writer(PID) # creating the directory for storing the processed and warped videos output_path = os.path.join("./output", str(PID)) processed_output_path = os.path.join(output_path, "processed") warped_output_path = os.path.join(output_path, "warped") os.mkdir(output_path) os.mkdir(processed_output_path) os.mkdir(warped_output_path) # listing all the videos associated with the PID videos = listVideos(PID) #creating a progress bar bar = IncrementalBar("Countdown", max=len(videos)) for video in videos: bar.next() video_name = video[video.rfind("/") + 1:].split(".")[0] + ".avi" ids = video[video.rfind("/") + 1:].split("_") SID = ids[1] PtID = ids[2].split(".")[0] vidcap = cv2.VideoCapture(video) success, frame = vidcap.read() frame = cv2.resize( frame, (int(frame.shape[1] / 1.25), int(frame.shape[0] / 1.25))) # creating an instance of the FrameHandling class to detect the green area and the hole in the frame frameHandler = FrameHandling(frame, conf["greenLower"], conf["greenUpper"], conf["holeLower"],
def determinate_progress_cli(msg, max): return IncrementalBar(msg, max=max)
def download(url, path=DEFAULT_PATH): """Download HTML page and page assets (img, css files) from given 'url'.""" # Generate output 'page_name' and 'file_path' and load page page_name = get_filename(url=url) file_path = get_full_path(path, page_name) # Make request, edit Soup object and save data into output file content = make_request(url) soup = BeautifulSoup(content, "html.parser") # Get list of links links = get_links(tag_meta=ASSET_TAGS, url=url, soup=soup) # Edit Soup object and replace links to loclal files if links: # Generate folder name and path folder_name = get_foldername(url=url) folder_path = get_full_path(path, folder_name) # Create output directory (id doesn't exist) if not os.path.isdir(folder_path): create_dir(local_path=folder_path) to_download = [] # Initiate download queue # Iterate links and edit soup object for link_dict in links: # Destructure link's dict fact_link, abs_link, tag = itemgetter('fact_link', 'abs_link', 'tag')(link_dict) # Generate file_name, local path & local link for item file_name = get_filename(url=abs_link) local_path = get_full_path(path, folder_name, file_name) local_link = get_full_path(folder_name, file_name) # Edit soup object soup = edit_soup(url=fact_link, tag=tag, meta=ASSET_TAGS[tag], local_link=local_link, soup=soup) # Add asset's absolute url and local_path into queue to_download.append((abs_link, local_path)) # Save modified soup save_file(data=soup.prettify(), local_path=file_path, mode='w') # Initiate progress bar and download assets progress_bar = IncrementalBar('Loading resourses:', max=len(to_download)) for abs_link, local_path in to_download: try: content = make_request(abs_link) save_file(data=content, local_path=local_path) except Exception: logger.error(f'Asset \'{abs_link}\' was not downloaded.') progress_bar.next() # Iterate progress bar # Finish progess_bar & return output's file path progress_bar.finish() return file_path
def download(self): bar = IncrementalBar('Downloading ', max=10) self.driver.get(CME_LINK + '/tools-information/quikstrike/options-calendar.html') first_window = self.driver.window_handles[0] bar.next() sleep(5) self.driver.get( CME_TOOLS_LINK + '/User/QuikStrikeView.aspx?viewitemid=IntegratedCMEOptionExpirationCalendar' ) bar.next() sleep(5) self.driver.find_element_by_xpath( '//a[@id="MainContent_ucViewControl_IntegratedCMEOptionExpirationCalendar_ucViewControl_hlCMEProducts"]' ).click() bar.next() sleep(5) for handle in self.driver.window_handles: if handle != first_window: self.driver.switch_to_window(handle) self.driver.find_element_by_xpath( '//a[@id="ctl00_cphMain_lvTabs_ctrl3_lbTab"]').click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//a[@id="cphMain_ucProductBrowser_ucProductFilter_ucTrigger_lnkTrigger"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//input[@id="cphMain_ucProductBrowser_ucProductFilter_ucGroupList_rblGroups_4"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//input[@id="cphMain_ucProductBrowser_ucProductFilter_ucContractTypeList_rblContractType_1"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//input[@id="cphMain_ucProductBrowser_ucProductFilter_btnApply"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//a[@id="cphMain_ucProductBrowser_ucProductActions_ucTrigger_lnkTrigger"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//a[@id="cphMain_ucProductBrowser_ucProductActions_lnkExport"]' ).click() bar.next() # self.driver.find_element_by_xpath( # '//a[@id="cphMain_ucProductBrowser_ucProductActions_lnkShowExpirations"]').click() # bar.next() # sleep(4) # iframe = self.driver.find_element_by_xpath('//iframe[@id="mainFrame"]') # self.driver.switch_to_frame(iframe) # bar.next() # sleep(4) # self.driver.find_element_by_xpath('//a[@id="ctl03_ucExport_lnkTrigger"]').click() # bar.next() sleep(5) bar.finish()
import pandas as pd from progress.bar import IncrementalBar import time import os os.chdir("G:\Enrollment Management Center\FA Roster") # os.chdir("/Volumes/Groups/Enrollment Management Center/FA Roster") file = input('Drop the file you want here:\n') timr = time.strftime("%m-%d-%y") # Sets the file name to current date bar = IncrementalBar(max=15) try: for i in range(15): data = pd.read_csv(file, sep='|', header=0, skiprows=5) data.drop('EMAIL', axis=1, inplace=True) # Deletes Email column data.drop(data.index[200:], inplace=True) # Limits rows to 200 data.sort_values(['REGTERM', 'PELL', 'APPTERM'], # Sort order ascending=[False, False, False], inplace=True) data.to_excel(timr + '.xlsx', index=False) time.sleep(0.2) bar.next() bar.finish() print('\nComplete!!!!') # If user didn't remove quotes or spaces except FileNotFoundError: print('\nMake sure there are no spaces or quotes and try again\n')
y += 1 SUF = "'%(percent).5f%% - %(eta)ds'" def check_big_oh(): with IncrementalBar('Processing', max=(10**6) * (LIM**3), suffix=SUF) as bar: for n_0 in approx_positive_rationals(): for c in approx_positive_rationals(): found_n = False for n in approx_naturals(): bar.next() if n >= n_0 and n <= c * n**2: found_n = True break if not found_n: return f"No n value for n_0={n_0}, c={n_0}." return "The statement is true!" if __name__ == "__main__": # print(check_big_oh()) with IncrementalBar('Processing', max=LIM * 1000, suffix='%(percent).2f%% - %(eta)ds') as bar: for i in approx_positive_rationals(): bar.next()
import time from progress.bar import Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar length = 20 # with Bar('Processing', max=length) as bar: # for i in range(length): # time.sleep(1) # bar.next() # for i in Bar('Processing').iter(range(length)): # time.sleep(1) # from progress.spinner import Spinner # for i in Spinner(Spinner.__name__).iter(range(10)): # time.sleep(1) from progress.bar import IncrementalBar with IncrementalBar(IncrementalBar.__name__, max=length, suffix='%(index)d/%(max)d [%(eta_td)s]') as bar: for i in bar.iter(range(length)): time.sleep(1) # for bar_cls in (Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar): # suffix = '%(index)d/%(max)d [%(elapsed)d / %(eta)d / %(eta_td)s]' # bar = bar_cls(bar_cls.__name__, suffix=suffix) # for i in bar.iter(range(length)): # time.sleep(1)
def trash_videos(time_limit, extensions, trash_folder_name, sudo): """Trash the videos that are shorter than time_limit to get rid of the shooting errors. Parameters ---------- time_limit : int Duration limit. If a video has a duration smaller than time_limit, it is moved into trash_folder_name. extensions : dict Contains the lists of extensions for each type of file. trash_folder_name : string Name of the folder where to put the trashed videos. Equal to 'Trash' by default but can be change in the video-logging/data.yaml file. sudo : bool Whether sudo mode is activated or not. """ def move_to_trash(file, duration, trash_folder_name): """Move a video to trash if it is too short. Check if a directory named trash_folder_name exists in current directory. If not, create it. Then, move `file` in trash_folder_name if `duration` is smaller than `time_limit`. Parameters ---------- file : string File to check. duration : int Duration of video file. trash_folder_name : string Name of the folder where to put the trashed videos. Equal to 'Trash' by default but can be change in the video-logging/data.yaml file. """ if duration < time_limit: if os.path.exists(trash_folder_name ): # if 'trash_folder_name' already exists if os.path.isfile( trash_folder_name ): # if 'trash_folder_name' is a regular file raise BadFolderName( f"You have a file named '{trash_folder_name}' in the current working directory, which is not a valid file name because this tool uses it as a directory name. You may consider changing the 'trash_folder_name' default in 'data.yaml'." ) else: # if 'trash_folder_name' is a directory pass else: # if 'trash_folder_name' does not exist os.mkdir(f'./{trash_folder_name}') os.rename(file, os.path.join(trash_folder_name, file)) return True return False check_parent(sudo) n = get_number_files(extensions, directory='Videos') if n == 0: raise EmptyFolder( "Nothing to do here, this folder does not countain any video.") bar = IncrementalBar(f"Trashing videos of duration <= {time_limit}s...", max=n) nb_trashed = 0 for file in os.listdir(): extension = os.path.splitext(file)[1] if extension in extensions['Videos']: with VideoFileClip(file) as clip: # we need to wait a little so that bad things do not happen time.sleep(.001) duration = clip.duration is_moved = move_to_trash( file, duration, trash_folder_name) # warning: side effect happening here if is_moved: nb_trashed += 1 bar.next() bar.finish() term = "s" if nb_trashed >= 2 else "" return f"{nb_trashed} video{term} trashed."
max_epochs = 201 validation_loss_history = [] training_loader = get_training_loader(batch_number=batch_num) validation_loader = get_validation_loader(batch_number=batch_num) print(f"Training batch-{batch_num}") for epoch in range(max_epochs): training_loss, validation_loss, accuracy, counter = 0, 0, 0, 0 model.train() training_bar = IncrementalBar( message='Training ', max=len(training_loader), suffix="%(percent)d%% [%(elapsed_td)s / %(eta_td)s]") training_timer = time() # ------------------- # TRAINING STEP # ------------------- for inputs, labels in training_loader: def closure(): # Clear the gradients and perform a forward pass optimiser.zero_grad() output = model(inputs) # Check the loss loss = error_function(output, labels)
columns=['viewid', 'Url', dimensions, metrics]) else: bigdf = pd.DataFrame(columns=['viewid', dimensions, metrics]) # Authenticate and construct service. service = get_service('analytics', 'v3', scope, 'client_secrets.json', thisgoogleaccount) profiles = service.management().profiles().list( accountId='~all', webPropertyId='~all').execute() #profiles is now list if debugvar: print("Processing: " + thisgoogleaccount) if debugvar: print("Total profiles: " + str(profiles['totalResults'])) bar = IncrementalBar('Processing', max=profiles['totalResults']) itemcounter = 0 for item in profiles['items']: dataPresent = False if test is not None and itemcounter == test: break bar.next() if 'starred' in item: smalldf = pd.DataFrame() if debugvar: print(item['id'] + ',' + start_date + ',' + end_date) if debugvar: print("Try querying: " + str(item['id']) + ":" + item['websiteUrl'])
print(f'Horas perdidas por día por utilitario: {tiempo_desperdiciado_dia}') print(f'Diagnóstica antes de la hora y media: {tuplas_datos[5]}') ''' resultados_finales = [len(eventos_terminados), eventos_sabado, tuplas_datos[0], tuplas_datos[1], tuplas_datos[3], tuplas_datos[4]] resultados_texto = f'Prueba,{len(eventos_terminados)},{eventos_sabado},{tuplas_datos[0]},{tuplas_datos[1]},' \ f'{tuplas_datos[3]},{tuplas_datos[4]},{tuplas_datos[5]},{minutos_utilitarios_usados},' \ f'{tiempo_usado_dia},{busquedas_kit}\n' # print(resultados_finales) with open('resultados.csv', 'a') as fd: fd.write(resultados_texto) print('COMIENZA LA SIMULACION') bar = IncrementalBar('Progress', max =100, suffix = "%(percent)d%%. [%(index)d/%(max)d] %(eta)ds remaining. %(elapsed)ds elapsed. %(avg)ds average.") for i in range(100): sys.stdout.write("\033[1;34m") sys.stdout.write('\r['+'-'*(i//2)+' '*(100-(i//2))+']' + "Progress: "+str(int(100*(i+1)/200))+"%. Generating calls #"+str(i+1)) sys.stdout.flush() parametros = generar_llamados(params, horas) simulacion(parametros) bar.next() sys.stdout.write('\r['+'-'*(i//2)+' '*(100-(i//2))+']' + "Progress: "+str(int(100*(i+1)/200))+"%. Running scheduling #"+str(i+1)) sys.stdout.flush() sys.stdout.write("\033[1;31m") #print("") bar.finish()
import os import re import secrets import time from progress.bar import IncrementalBar import requests from multiprocessing.dummy import Pool as ThreadPool from colorama import Fore as f from colorama import Style as s from search.logger import log_all, log_errors from os import walk from search.request import Request from urllib.parse import urljoin bar = IncrementalBar('Countdown', max=1456) class WAFBypass: def __init__(self, host, proxy): self.host = host if proxy == '': self.proxy = {'http': proxy, 'https': proxy} else: self.proxy = {'http': None, 'https': None} self.session = requests.Session() self.session.trust_env = False self.name_pattern = re.compile(r'\d+\.json') self.timeout = 150 self.calls = 0
# Sets the earlier date as the start date object. start = datetime.strptime(min(date1, date2), "%Y-%m-%d") # Sets the most recent date as the end date object. end = datetime.strptime(max(date1, date2), "%Y-%m-%d") # Calculates number of JSON files that will be downloaded. delta = abs(start - end).days + 1 print("Days of data to download: " + str(delta) + "\n") # Save Directory Input ex) /home/<your name>/valhalla/data/ save_directory = input("Input Save Directory: \n") # Status Bar Initializer bar = IncrementalBar('Download Status:', max=delta) # Downloads JSON files based on the provided date range working from the most recent to the oldest. # Data file for the current date might be missing and will be skipped, if neccessary. for i in range(delta): day = end - timedelta(days=i) url = str("https://zkillboard.com/api/history/" + day.strftime("%Y%m%d") + ".json") try: response = requests.get(url) response.raise_for_status() try: data = response.json() with open(save_directory + day.strftime("%Y%m%d") + ".json", "w") as f: json.dump(data, f)
def main(args): if not os.path.exists('results'): os.makedirs('results') if not os.path.exists('counters'): os.makedirs('counters') exp_type = utils.create_file_prefix(args.positive_fraction, args.with_delta, args.fraction, args.sampler_size, args.pop) send_strategy = SendStrategy.SendDelta( ) if args.with_delta else SendStrategy.SendVector() for dataset in args.datasets: print("Working on", dataset, "dataset") if not os.path.exists('results/{}'.format(dataset)): os.makedirs('results/{}'.format(dataset)) if not os.path.exists('counters/{}'.format(dataset)): os.makedirs('counters/{}'.format(dataset)) if args.create_dataset_files: # Read the dataset and prepare it for training, validation and test names = ['user_id', 'item_id', 'rating', 'utc'] df = pd.read_csv('original_datasets/' + dataset + '.tsv', sep='\t', dtype={ 'rating': 'float64', 'utc': 'int64' }, header=0, names=names) df = df.groupby('user_id').filter(lambda x: len(x) >= 20) print(df.shape[0], 'interactions read') df, _ = utils.convert_unique_idx(df, 'user_id') df, _ = utils.convert_unique_idx(df, 'item_id') user_size = len(df['user_id'].unique()) item_size = len(df['item_id'].unique()) print('Found {} users and {} items'.format(user_size, item_size)) total_user_lists = utils.create_user_lists(df, user_size, 4) train_user_lists, validation_user_lists, test_user_lists = utils.split_train_test( total_user_lists, test_size=0.2, validation_size=args.validation_size) #train_interactions_size = sum([len(user_list) for user_list in train_user_lists]) #print('{} interactions considered for training'.format(train_interactions_size)) if not os.path.exists('sets'): os.makedirs('sets') with open('sets/{}_trainingset.tsv'.format(dataset), 'w') as out: for u, train_list in enumerate(train_user_lists): for i in train_list: out.write( str(u) + '\t' + str(i) + '\t' + str(1) + '\n') with open('sets/{}_testset.tsv'.format(dataset), 'w') as out: for u, test_list in enumerate(test_user_lists): for i in test_list: out.write( str(u) + '\t' + str(i) + '\t' + str(1) + '\n') continue df = pd.read_csv('sets/{}_trainingset.tsv'.format(dataset), sep='\t', names=['user_id', 'item_id', 'rating']) df, reverse_dict = utils.convert_unique_idx(df, 'item_id') user_size = len(df['user_id'].unique()) item_size = len(df['item_id'].unique()) print('Found {} users and {} items'.format(user_size, item_size)) train_user_lists = utils.create_user_lists(df, user_size, 3) train_interactions_size = sum( [len(user_list) for user_list in train_user_lists]) print('{} interactions considered for training'.format( train_interactions_size)) if args.pop: print("Analyzing popularity... \r") most_popular_items = (args.pop, utils.get_popularity(train_user_lists)) print("Done.") else: most_popular_items = None if args.pop == 3: splitting_epochs = [ int(7 * args.n_epochs / 8), int(3 * args.n_epochs / 4), int(args.n_epochs / 2) ] # Set parameters based on arguments if args.fraction == 0: round_modifier = int(train_interactions_size) else: round_modifier = int(train_interactions_size / (args.fraction * user_size)) sampler_dict = { 'single': 1, 'uniform': int(train_interactions_size / user_size) } sampler_size = sampler_dict.get(args.sampler_size) # Build final triplet samplers triplet_samplers = [ TripletSampler(train_user_lists[u], item_size, sampler_size) for u in range(user_size) ] for n_factors in args.n_factors: exp_setting_1 = "_F" + str(n_factors) for lr in args.lr: exp_setting_2 = exp_setting_1 + "_LR" + str(lr) # Create server and clients server_model = ServerModel(item_size, n_factors) server = Server(server_model, lr, args.fraction, args.positive_fraction, args.mp, send_strategy, most_popular_items) clients = [ Client(u, ClientModel(n_factors), triplet_samplers[u], train_user_lists[u], sampler_size) for u in range(user_size) ] # Start training for i in range(args.n_epochs * round_modifier): if i % round_modifier == 0: bar = IncrementalBar('Epoch ' + str(int(i / round_modifier + 1)), max=round_modifier) bar.next() server.train_model(clients) if args.pop: if args.pop == 3: if len(splitting_epochs) > 0: if (i + 1) % (splitting_epochs[-1] * round_modifier) == 0: splitting_epochs.pop() server.new_step() else: if (i + 1) % (args.step_every * round_modifier) == 0: server.new_step() # Evaluation if ((i + 1) % (args.eval_every * round_modifier)) == 0: exp_setting_3 = exp_setting_2 + "_I" + str( (i + 1) / round_modifier) results = server.predict(clients, max_k=100) with open( 'results/{}/{}{}.tsv'.format( dataset, exp_type, exp_setting_3), 'w') as out: for u in range(len(results)): for e, p in results[u].items(): out.write( str(u) + '\t' + str(reverse_dict[e]) + '\t' + str(p) + '\n') final_dict = {k: 0 for k in range(item_size)} for i in server.big_list: final_dict[i] += 1 with open('counters/{}/{}.tsv'.format(dataset, exp_type), 'w') as out: for k, v in final_dict.items(): out.write(str(reverse_dict[k]) + '\t' + str(v) + '\n')
def run(self, http_method): from progress.bar import IncrementalBar log20x = logging.getLogger("log20x") log40x = logging.getLogger("log40x") err_log = logging.getLogger("err_logger") case_total = self.sample.sample_total() * self.payload.sample_total() bar = IncrementalBar(u'RUNNING', max=case_total) # executor = ThreadPoolExecutor(max_workers=100) with ThreadPoolExecutor(max_workers=100) as executor: # for fn, ln, test_url in self._generate_url_req(): # bar.next() # try: # r = self.RequestMethod[http_method](test_url, headers={ # "User-Agent": r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36", # "WAF-Test-Case-ID": "%s (%d)" % (fn, ln) # }) # if r.status_code / 200 == 1: # log20x.info("[%d] %s (%s:%d)" % (r.status_code, test_url, fn, ln)) # elif r.status_code >= 500: # err_log.error("[%d] %s" % (r.status_code, test_url)) # else: # log40x.info("[%d] %s" % (r.status_code, test_url)) # # except requests.exceptions.ConnectionError, e: # err_log.error("[%s] %s" % (e, test_url)) # self.test_total += 1 # bar.finish() chrome_ua = r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36" fe = [] for fn, ln, test_url in self._generate_url_req(): test_header = { "User-Agent": chrome_ua, "Waf-Test-Case": "%s:%d" % (os.path.basename(fn), ln) } fe.append( executor.submit(self.RequestMethod[http_method], test_url, headers=test_header)) for f in as_completed(fe): try: r = f.result() test_case_id = 'unknown' test_url = r.request.url if 'Waf-Test-Case' in r.headers: test_case_id = r.headers['Waf-Test-Case'] bar.next() if r.status_code / 200 == 1: log20x.info("[%d] %s (%s)" % (r.status_code, test_url, test_case_id)) elif r.status_code >= 500: err_log.error("[%d] %s (%s)" % (r.status_code, test_url, test_case_id)) else: log40x.info("[%d] %s (%s)" % (r.status_code, test_url, test_case_id)) self.test_total += 1 except requests.exceptions.ConnectionError, e: err_log.error("%s" % e)
time.sleep(t) for bar_cls in (Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar): suffix = '%(index)d/%(max)d [%(elapsed)d / %(eta)d]' bar = bar_cls(bar_cls.__name__, suffix=suffix) for i in bar.iter(range(200)): sleep() for bar_cls in (IncrementalBar, ShadyBar): suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]' bar = bar_cls(bar_cls.__name__, suffix=suffix) for i in bar.iter(range(200)): sleep() for spin in (Spinner, PieSpinner, MoonSpinner, LineSpinner): for i in spin(spin.__name__ + ' ').iter(range(100)): sleep() print() for singleton in (Counter, Countdown, Stack, Pie): for i in singleton(singleton.__name__ + ' ').iter(range(100)): sleep() print() bar = IncrementalBar('Random', suffix='%(index)d') for i in range(100): bar.goto(random.randint(0, 100)) sleep() bar.finish()
with warnings.catch_warnings(): warnings.simplefilter('ignore') import h5py print('\n=== COUPLE ===') # Get list of all files for all coords CC, nfiles = couplepaths(coords, mapcfg) targetpath = paths['tiles'] print('Target path:\n{}'.format(targetpath)) # Initialise file counter, progress bar and processed coords list hdfcnt = 0 # HDF file counter imgcnt = 0 # Written image file counter bar = IncrementalBar('Processing files... ETA: %(eta)ds', max=nfiles, width=25) fileerror = False # Interate over coordinates for C in CC: # Get coordinate coord = C['coord'] filepaths = C['filepaths'] # Iterate over filepaths for this coordinate for filepath in filepaths: try: with h5py.File(filepath, 'r') as f: # Open HDF5 file # Find tile coords, Slice tiles and write files ind = couple_indexer(f, coord)
for bar_cls in (Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar): suffix = '%(index)d/%(max)d [%(elapsed)d / %(eta)d / %(eta_td)s] (%(iter_value)s)' bar = bar_cls(bar_cls.__name__, suffix=suffix) for i in bar.iter(range(200, 400)): sleep() for bar_cls in (IncrementalBar, PixelBar, ShadyBar): suffix = '%(percent)d%% [%(elapsed_td)s / %(eta)d / %(eta_td)s]' with bar_cls(bar_cls.__name__, suffix=suffix, max=200) as bar: for i in range(200): bar.next() sleep() bar = IncrementalBar(bold('Corolored'), color='green') for i in bar.iter(range(200)): sleep() for spin in (Spinner, PieSpinner, MoonSpinner, LineSpinner, PixelSpinner): for i in spin(spin.__name__ + ' %(index)d ').iter(range(100)): sleep() for singleton in (Counter, Countdown, Stack, Pie): for i in singleton(singleton.__name__ + ' ').iter(range(100)): sleep() bar = IncrementalBar('Random', suffix='%(index)d') for i in range(100): bar.goto(random.randint(0, 100)) sleep()
def check_rds_instance(rds_name, states, connection, auto_name): # Create RDS client rds = boto3.client('rds') print( '\n' + tag + 'Creating Database\nPlease wait as it typically takes 10-15 minutes before an instance is available.' ) # Create progress bar and continuously update it bar = IncrementalBar(rds_name, max=len(states), suffix='') while True: global creating global backing_up global available global monitoring global logging global count # Check RDS instance response = rds.describe_db_instances(DBInstanceIdentifier=rds_name) instances = response.get('DBInstances') status = instances[0].get('DBInstanceStatus').title() # Handle 'Creating' status if status == 'Creating' and not creating: creating = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Handle 'Backing-Up' status elif status == 'Backing-Up' and not backing_up: backing_up = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Handle 'Available' status elif status == 'Available' and not available: available = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) break # Handle 'Configuring-Enhanced-Monitoring' status elif status == 'Configuring-Enhanced-Monitoring' and not monitoring: monitoring = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Handle 'Configuring-Log-Exports' status elif status == 'Configuring-Log-Exports' and not logging: logging = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Sleep for 30 seconds between checks time.sleep(30) # Finish progress bar bar.finish() # Check for schema and grab endpoint check_schema = False while not check_schema: # Automatically create postgresql based on auto_name if auto_name: check_schema = True response = rds.describe_db_instances(DBInstanceIdentifier=rds_name) instances = response.get('DBInstances') endpoint = instances[0].get('Endpoint').get('Address') cps.create_postgres_sql(rds_name, auto_name, endpoint, connection) # Ask for schema file if auto_name not provided else: print('\n' + tag + 'Database Ready\n' + 'Please specify the schema filename (excluding .json):', end=' ') schema_name = input() if schema_name != '' and not schema_name.endswith('.json'): check_schema = True response = rds.describe_db_instances( DBInstanceIdentifier=rds_name) instances = response.get('DBInstances') endpoint = instances[0].get('Endpoint').get('Address') cps.create_postgres_sql(rds_name, schema_name, endpoint, connection) # Handle invalid input else: print( Style.BRIGHT + 'Invalid entry. Please enter a valid schema filename exlcuding the ".json" extension.\n' )
import time from progress.bar import IncrementalBar mylist = [1,2,3,4,5,6,7,8] bar = IncrementalBar('Countdown', max = len(mylist)) for item in mylist: bar.next() # time.sleep(1) bar.finish()
def install(package_list): ''' Install A Specified Package(s) ''' if platform == 'linux' or platform == 'darwin': password = getpass('Enter your password: '******'' # otherwise the variable would be undefined.. packages = package_list.split(',') turbocharge = Installer() click.echo('\n') os_bar = IncrementalBar('Getting Operating System...', max=1) os_bar.next() for package_name in packages: package_name = package_name.strip(' ') if platform == 'linux': click.echo('\n') finding_bar = IncrementalBar('Finding Requested Packages...', max=1) if package_name in devpackages_linux: show_progress(finding_bar) turbocharge.install_task( devpackages_linux[package_name], f'{constant.apt_script} {package_name}', password, f'{package_name} --version', [f'{devpackages_linux[package_name]} Version']) if package_name in applications_linux: show_progress(finding_bar) turbocharge.install_task( applications_linux[package_name], f'{constant.snap_script} {package_name}', password, '', []) if package_name == 'chrome': show_progress(finding_bar) try: click.echo('\n') password = getpass("Enter your password: "******"choco install {package_name} -y", password="", test_script=f"{package_name} --version", tests_passed=[ f'{devpackages_windows[package_name]} Version' ]) elif package_name in applications_windows: show_progress(finding_bar) turbocharge.install_task( package_name=applications_windows[package_name], script=f"choco install {package_name} -y", password="", test_script="", tests_passed=[]) elif package_name not in devpackages_windows and package_name not in applications_windows: click.echo('\n') click.echo(click.style(':( Package Not Found! :(', fg='red')) if platform == 'darwin': click.echo('\n') finding_bar = IncrementalBar('Finding Requested Packages...', max=1) if package_name in devpackages_windows: show_progress(finding_bar) turbocharge.install_task( package_name=devpackages_macos[package_name], script=f"brew install {package_name}", password="", test_script=f"{package_name} --version", tests_passed=[ f'{devpackages_macos[package_name]} Version' ]) # test _scirpt is just a string here.. elif package_name in applications_windows: show_progress(finding_bar) turbocharge.install_task( package_name=applications_macos[package_name], script=f"brew cask install {package_name}", password="", test_script="", tests_passed=[]) elif package_name not in devpackages_macos and package_name not in applications_macos: click.echo('\n') click.echo(click.style(':( Package Not Found! :(', fg='red'))