def tab_mix_table(self, mix_data, _verbose=False, brainz=False, format=""): mix_data_key_bpm = self.replace_key_bpm(mix_data) mix_data_nl = self.trim_table_fields(mix_data_key_bpm) # for row in mix_data_nl: # DEBUG # log.debug(str(row)) # log.debug("") if _verbose: self.p( tab(mix_data_nl, tablefmt='pipe' if not format else format, headers=self.cols_mixtracks.headers_dict(short=True))) elif brainz: mix_data_brainz = self.replace_brainz(mix_data_key_bpm) mix_data_brainz_nl = self.trim_table_fields(mix_data_brainz, exclude=['methods']) self.p( tab(mix_data_brainz_nl, tablefmt='grid' if not format else format, headers=self.cols_mixtracks_brainz.headers_dict())) else: self.p( tab(mix_data_nl, tablefmt='pipe' if not format else format, headers=self.cols_mixtracks_basic.headers_dict()))
def tab_mix_info_header(self, mix_info): self.print_help( tab([mix_info], tablefmt="plain", headers=[ "Mix", "Name", "Created", "Updated", "Played", "Venue" ]))
def tab_online_search_results(self, _result_list): self.print_help( tab(_result_list, tablefmt="simple", headers=[ "ID", "Artist", "Release", "Label", "C", "Year", "Format" ]))
def main(): __mkpro() athletes = pd.DataFrame([]) rides = pd.DataFrame([]) zips = [f for f in os.listdir(zip_dir) if f.endswith('.zip')] for z in zips: print('Processing file: {}'.format(z)) z_name = os.path.splitext(z)[0] z_path = os.path.join(zip_dir, z) z_out = os.path.join(pro_dir, z_name) x = StravaExport(z_path, z_out) x.extract_zip() df1 = x.athlete_pd() df2 = x.rides_pd() athletes = athletes.append(df1, sort=True) rides = rides.append(df2, sort=True) rides = rides[rides.ftp > 0] rc = rides.groupby('id').size().rename('num_rides') athletes = athletes.merge(rc.to_frame(), left_on='id', right_on='id') print(tab(athletes, headers='keys', tablefmt='psql')) print('Saving output to: {}'.format(pro_dir)) athletes.to_csv(os.path.join(pro_dir, 'athletes.csv'), index=False) rides.to_csv(os.path.join(pro_dir, 'rides.csv'), index=False)
def tab_mix_table(self, _mix_data, _verbose=False): if _verbose: self.print_help( tab(_mix_data, tablefmt="pipe", headers=[ "#", "Release", "Track\nArtist", "Track\nName", "Track\nPos", "Key", "BPM", "Key\nNotes", "Trans.\nRating", "Trans.\nR. Notes", "Track\nNotes" ])) else: self.print_help( tab(_mix_data, tablefmt="pipe", headers=[ "#", "Release", "Tr\nPos", "Trns\nRat", "Key", "BPM" ]))
def tab_mixes_list(self, mixes_data): mixes_short_timestamps = self.shorten_mixes_timestamps(mixes_data) tabulated = tab( self.trim_table_fields(mixes_short_timestamps), tablefmt="simple", headers=self.cols_mixes.headers_dict() # data is dict, headers too ) self.p(tabulated)
def __call__(self, nodes): sum_games = 0 qnodes = len(nodes) assert qnodes > 0 scores = np.zeros((qnodes)) qgames = np.zeros((qnodes)) zero = [] for i, node in enumerate(nodes): if node.qgames > 0: n = self.weights[0] * node.qgames scores[i] = node.score / node.qgames qgames[i] = n sum_games += n elif node.estimation.weight > 0: n = self.weights[1] * node.estimation.weight scores[i] = node.estimation.value qgames[i] = n sum_games += n else: zero.append(node) if zero: return random.choice(zero) log_sum_games = np.log(1.0 + sum_games) ucb = scores + self.C * np.sqrt(log_sum_games / qgames) index = np.random.choice(np.flatnonzero(ucb == ucb.max())) if self.log: from itertools import count print('UcbMoveSelection:') print('=================') print() rows = [] for i, node, ucb in zip(count(), nodes, ucb): rows.append([i, ucb, '*' if i == index else ' '] + node.dump_row()) if TABULATE: columns = [ '#', 'UCB', '!', 'view', 'locked', 'score', 'eval', 'qgames', 'qmoves', 'es.value', 'es.weight' ] + ['r' + str(i) for i in range(1, 10)] print(tab(rows, tablefmt='plain', headers=columns)) else: print('\n'.join('\t'.join(str(value) for value in row) for row in rows)) print('SELECTED:', index, '-', nodes[index].view) print() return nodes[index]
def display_analysis(functions, arr, text_file): """ """ rounder = 1000 print(tab([['Data Type', str(type(arr[1][0][0]))], ['List Size', str(len(arr[1][0])) + ' elements']], headers=['Stat', 'Value'], tablefmt="orgtbl"), file=text_file) table = [] sort_proof = [] for i in range(len(functions)): analysis = empirical_analysis(functions[i][0], arr) time_collections = [] + [analysis['ordered']['time']] + [ analysis['random']['time'] ] + [analysis['reversed']['time']] time_collections.sort() table += [[ functions[i][1], str(time_collections[0] * rounder)[:6] + ' \N{GREEK SMALL LETTER MU}s' + get_instance(analysis, time_collections[0]), str(time_collections[1] * rounder)[:6] + ' \N{GREEK SMALL LETTER MU}s' + get_instance(analysis, time_collections[1]), str(time_collections[2] * rounder)[:6] + ' \N{GREEK SMALL LETTER MU}s' + get_instance(analysis, time_collections[2]), str(analysis['ordered']['comparisons']) ]] if i is 0: sort_proof += [[ 'Input', analysis['random']['unsorted_arr'][:5] + ['...'] ], ['Output', analysis['random']['sorted_arr'][:5] + ['...']]] if len(sort_proof) > 0: print(tab(sort_proof, tablefmt="orgtbl"), file=text_file) print('', file=text_file) print(tab(table, headers=['Algorithm', 'Best ', 'Average', 'Worst', 'Compares'], tablefmt='orgtbl'), file=text_file) return table
def dump_nodes(nodes): if TABULATE: rows = (node.dump_row() for node in nodes) columns = [ 'view', 'locked', 'score', 'qgames', 'eval', 'qmoves', 'es.value', 'es.weight' ] + ['r' + str(i) for i in range(1, 10)] return tab(rows, tablefmt='plain', headers=columns) else: return '\n'.join('\t'.join(str(value) for value in node.dump_row()) for node in nodes)
def tab_online_search_results(self, _result_list): self.p( tab(_result_list, tablefmt="simple", headers={ 'id': 'ID', 'artist': 'Artist', 'title': 'Release', 'label': 'Label', 'country': 'C', 'year': 'Year', 'format': 'Format' }))
def main_logic(): host, user, password, instance = set_db_connection_data() cursor_connect = connect_to_db(host, user, password, instance) cursor_execute = execute_query(cursor_connect) inventory_files_list = fetch_query_results(cursor_execute) write_s3_search_file(inventory_files_list) profile, proxy = set_s3_connection_data() session = connect_to_s3(profile, proxy) raw_path_list, sliced_paths = create_path_list() s3_found_files = s3_file_search(sliced_paths, session) datasets = create_dataset_list() report = create_df_report(datasets) df_s3_count = write_file_counts(s3_found_files, datasets, 'S3 Count') report = pd.concat([report, df_s3_count], axis=1) df_inventory_count = write_file_counts(raw_path_list, datasets, 'Inventory Count') report = pd.concat([report, df_inventory_count], axis=1) df_s3_to_inventory_ratio = calculate_ratios(report, 'S3 Count', 'Inventory Count') report = pd.concat([report, df_s3_to_inventory_ratio], axis=1) df_inventory_to_s3_ratio = calculate_ratios(report, 'Inventory Count', 'S3 Count') report = pd.concat([report, df_inventory_to_s3_ratio], axis=1) df_s3_missing_files = calculate_missing_files(report, 'S3 Count', 'Inventory Count') report = pd.concat([report, df_s3_missing_files], axis=1) df_inventory_missing_files = calculate_missing_files( report, 'Inventory Count', 'S3 Count') report = pd.concat([report, df_inventory_missing_files], axis=1) report.to_csv('search_report.csv') print('\n') print( '\033[4mSEARCH RESULTS BELOW. CHECK search_report.csv IN YOUR WORKSPACE:\033[0m' ) print('\n') print(tab(report, headers=report.columns)) print('\n') answer = input('SAVE THE INVENTORY TABLE FULL SEARCH LOG TO DISK? Y/N: ') write_search_results(s3_found_files, inventory_files_list, 'INVENTORY FILE', 'INVENTORY', 'S3', 'inventory_log.csv', answer) print('\n') answer = input('SAVE THE S3 FULL SEARCH LOG TO DISK? Y/N: ') write_search_results(inventory_files_list, s3_found_files, 'S3 FILE', 'S3', 'INVENTORY', 's3_log.csv', answer)
def seats(train, src, dest, date): data = { 'train_no': F"{train}.", 'stn_from': src, 'stn_to': dest, 'journey_date': date } response = requests.post(BR_API_SEATS, json=data) data = [] headers = [ 'Class', 'Fare (Adult)', 'Fare (Child)', 'Counter Seat', 'Mobile Seat' ] for seat in response.json()['DATA']: data.append([ seat['CLASS'], seat['FARE'], seat['FARE_C'], seat['COUNTER_SEAT'], seat['MOBILE_SEAT'] ]) print(tab(data, headers=headers))
def tab_all_releases(self, releases_data): table = [dict(row) for row in releases_data] for i, row in enumerate(table): links_str = self.join_links_to_str(row) row['artist_title_links'] = '{} - {}\n{}\n '.format( row['d_artist'], row['discogs_title'], links_str) del (table[i]['m_rel_id_override']) del (table[i]['m_rel_id']) del (table[i]['discogs_id']) del (table[i]['d_artist']) del (table[i]['discogs_title']) table = self.trim_table_fields(table, 40) print( tab(table, tablefmt="grid", headers={ 'd_catno': 'CatNo', 'artist_title_links': 'Release: Artist - Title - Links' }))
def tab_stats(self, releases_total, releases_matched, tracks_total, tracks_matched, releases_collection_flag, releases_collection_online, mixtracks_total, mixtracks_unique, tracks_key_brainz, tracks_key_manual, tracks_bpm_brainz, tracks_bpm_manual): stats = [ ['Releases in DiscoBASE', releases_total], ['Releases in Collection (DB flag)', releases_collection_flag], ['Releases in Collection (Discogs)', releases_collection_online], ['Releases matched with *Brainz', releases_matched], ['Tracks in DiscoBASE', tracks_total], ['Tracks matched with *Brainz', tracks_matched], ['Tracks with *Brainz key', tracks_key_brainz], ['Tracks with *Brainz BPM', tracks_bpm_brainz], ['Tracks with user-provided key', tracks_key_manual], ['Tracks with user-provided BPM', tracks_bpm_manual], ['Tracks in mixes', mixtracks_total], ['Unique tracks in mixes', mixtracks_unique], ] self.p(tab(stats, tablefmt='plain'), lead_nl=True)
def search(src, dest, date, adult, child, class_): params = ( ('journey_date', date), ('from_station', src), ('to_station', dest), ('class', class_), ('adult', adult), ('child', child), ) response = requests.get(BR_API_TRAINS, params=params) data = [] headers = [ 'Train #', 'Train Name', 'Departure Time', 'Duration', 'Train Left' ] for train in response.json(): data.append([ train['trn_no'], train['trn_name'], train['dpt_time'], train['duration'], train['isTrainLeft'] ]) print(tab(data, headers=headers))
def build_graph_tables(tables, text_file): new_table = [] bubble = ['Bubble'] select = ['Selection'] insert = ['Insertion'] shell = ['Shell'] merge = ['Merge'] heap = ['Heap'] for table in tables: bubble += [float(table[1][0][2][:6])] select += [float(table[1][1][2][:6])] insert += [float(table[1][2][2][:6])] shell += [float(table[1][3][2][:6])] merge += [float(table[1][4][2][:6])] heap += [float(table[1][5][2][:6])] new_table = [bubble] + [select] + [insert] + [shell] + [merge] + [heap] print(tab(new_table, tablefmt='orgtbl', numalign="right", headers=['Sort', 'Int', 'Float', 'String', 'Card']), file=text_file) return new_table
def tab_mixes_list(self, mixes_data): # make list of dicts out of the sqlite tuples list mixes = [dict(row) for row in mixes_data] for i, mix in enumerate( mixes): # shorten/format timestamps in this view mixes[i]['created'] = self.shorten_timestamp(mix['created'], text=True) mixes[i]['played'] = self.format_date_month(mix['played'], text=True) mixes[i]['updated'] = self.shorten_timestamp(mix['updated'], text=True) tabulated = tab( self.trim_table_fields(mixes), tablefmt="simple", # headers has to be dict too! headers={ 'mix_id': '#', 'name': 'Name', 'played': 'Played', 'venue': 'Venue', 'created': 'Created', 'updated': 'Updated' }) self.p(tabulated)
def main(debug=None): args = docopt(__doc__, version='VCF-Toolbox v0.1', argv=debug, options_first=False) # Setup Genomes Directory if args["location"] and args["<path>"]: if args["<path>"] == "-": genome_directory = get_genome_directory_file() os.remove(genome_directory) return get_genome_directory_file() else: with open(get_genome_directory_file(), "w") as f: genome_directory = os.path.realpath(args["<path>"]) with indent(2): puts( colored.blue("\nSet genome location to: " + genome_directory + "/\n")) f.write(genome_directory) # create directory if not exists if not os.path.exists(genome_directory): os.makedirs(genome_directory) return genome_directory if args["--directory"]: genome_directory = os.path.realpath(args["--ref"]) else: genome_directory = get_genome_directory() with indent(2): puts(genome_directory) if args["location"] and not args["<path>"]: return genome_directory genome_db = get_genome_directory() + "/genomes.db" ################ # List Genomes # ################ if args["list"]: output_genome_list() ################## # Search Genomes # ################## elif args["--search"]: # Download and cache a list of genomes from NCBI for searching download_genomes(genome_db) # Cache result header = [ "assembly_accession", # 0 "bioproject", # 1 "organism_name", # 7 "asm_name", # 15 "ftp_path" ] # 19 with indent(2): puts(colored.blue('\nSearching...\n')) with open(genome_db, "r") as f: results = [] for line in f: if not line.startswith("#"): line = line.strip().split("\t") line = [ x for k, x in enumerate(line) if k in [0, 1, 7, 15, 19] ] if args["--search"].lower() in line[2].lower( ) and line[4] != "na": results.append(line) with indent(4): puts(tab(results, headers=header)) with indent(2): puts(colored.blue('\nTo download a genome and setup for use:')) with indent(4): puts(colored.green("\nvk genome ncbi --ref=<asm_name>\n")) return results elif args["--ref"]: # reference name. reference_name = args["--ref"] # Ensure genome db is available download_genomes(genome_db) # reference directory if not args["--directory"]: reference_directory = genome_directory + "/" + reference_name + "/" else: reference_directory = genome_directory + "/" if not os.path.exists(reference_directory): os.makedirs(reference_directory) # base reference filename. ref_filename = reference_directory + reference_name + ".tmp.fa.gz" if args["wormbase"]: asm_name = args["--ref"] asm_url = f"ftp://ftp.wormbase.org/pub/wormbase/releases/{asm_name}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{asm_name}.genomic.fa.gz" comm = f"curl {asm_url} > {ref_filename}" err = run_command(comm) if err != 0: raise Exception( colored.red(f"Wormbase genome {args['--ref']} not found.")) # Unzip wormbase genome run_command(f"gzip -df {ref_filename}", shell=True) else: # NCBI with open(genome_db, "r") as f: results = [] for line in f: if not line.startswith("#"): line = line.strip().split("\t") line = [ x for k, x in enumerate(line) if k in [0, 1, 7, 15, 19] ] if args["--ref"] == line[3]: results.append(line) reference_download = results[0] url = reference_download[4].replace( "ftp://", "http://" ) + "/" + os.path.split( reference_download[4])[1] + "_genomic.fna.gz" if len(results) == 0: with indent(2): puts( colored.red('\nError: Genome ' + args["--ref"] + ' not found\n')) with indent(2): puts( colored.green('\nDownloading: ' + reference_name + "; " + url + '\n')) # stack overflow: 15644964; r = requests.get(url, stream=True) with open(ref_filename, 'wb') as f: total_length = int(r.headers.get('content-length')) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): if chunk: f.write(chunk) f.flush() # Fix chromosome names if not args["--accession-chrom-names"] and not args['wormbase']: with indent(2): puts(colored.green('\nFixing chromosome names\n')) with open(ref_filename.replace(".fa.gz", ".fa"), 'w') as outfa: with gzip.open(ref_filename, 'rb') as f: for line in f: line = line.decode("utf-8") outline = line if line.startswith(">"): acc = line.split(" ")[0].strip(">") chrom_name = fetch_chrom_name(acc) if chrom_name is not None: outline = ">" + chrom_name + "\n" elif line.lower().find("mitochon") > 0: outline = ">MtDNA\n" puts( colored.blue(line.strip("\n>")) + " --> " + colored.blue(outline.strip("\n>"))) outfa.write(outline) if which("bgzip"): with indent(2): puts(colored.green('\nSwitching from gzip to bgzip\n')) # Convert to bgzip if args["--accession-chrom-names"]: run_command(f"gzip -df {ref_filename}", shell=True) comm_bgzip = "bgzip -fc {ref_filename} > {ref_out}" comm_bgzip = comm_bgzip.format( ref_filename=ref_filename.replace(".fa.gz", ".fa"), ref_out=ref_filename.replace(".tmp", "")) run_command(comm_bgzip, shell=True) ref_filename = ref_filename.replace(".tmp", "") else: with indent(2): puts_err(colored.red("Please install bgzip.")) exit() if which("bwa"): with indent(2): puts(colored.green("\nCreating bwa index\n")) comm = f"bwa index {ref_filename}" run_command(comm, shell=True) else: with indent(2): puts(colored.blue("\nSkipping bwa index; bwa not installed\n")) if which("samtools"): with indent(2): puts(colored.green("\nCreating samtools index\n")) comm = f"samtools faidx {ref_filename}" run_command(comm, shell=True) else: with indent(2): puts( colored.blue( "\nSkipping samtools index; Samtools not installed\n")) if which("makeblastdb") and which("gzip"): with indent(2): puts(colored.green("\nCreating blast database\n")) comm = "gzip -dc {ref} | makeblastdb -in - -dbtype=nucl -title={ref} -out={ref}".format( ref=ref_filename) run_command(comm, shell=True) else: with indent(2): puts( colored.blue( "\nSkipping creation of blast database; blast is not installed\n" )) # Remove temp files if args["--accession-chrom-names"]: os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz")) # Remove temporary files try: os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz")) os.remove(ref_filename.replace(".fa.gz", ".tmp.fa")) except: pass # Add error checking here... with indent(2): puts(colored.green("\nComplete!\n"))
#print "Lat:", xp #print "Lon:", yp print "Master Global Coord:", final_coord print "Slave Global Coord:", final_coord_d #print "Master KPs Coord", pixel_coord #print "Slave KPs Coord:", dst_pts #print "Image 1 Size:", img1.shape #print "Image 2 Size:", img2.shape #print "RMSE per GCP Northing:", rmse_per_gcp_x #print "RMSE per GCP Easting:", rmse_per_gcp_y #print "RMSE per GCP:", rmse_per_gcp #cv2.imshow("Matched", view) #plt.imshow(view) #print tab(final_coord, headers,numalign="right") print tab(d) #print tab(final_coord_d, headers,numalign="right") #print tab(rpg) print C, A, B, F, D2, E #print C, A,B,F, D2,E #plt.plot (gcp, rmse_per_gcp_x, "bs") #plt.plot (gcp, rmse_per_gcp_y, "ro") #cv2.imshow("Matched", view) #cv2.waitKey(1000) #plt.imshow(view) plt.show(10000) #print s #print d print final_coord print final_coord_d
# Reshape to (N,1) xp.shape =(N,1) yp.shape = (N,1) # Global coordinate of src_pts of master image final_coord = np.hstack((xp,yp)) headers = ["Lat", "Lon"] headers2 = ["Master", "Slave"] headers3 = ["X", "Y"] headers4 = ["RMSE per GCP"] # print global coordinates of the source points (src_pts) print tab(final_coord, headers,numalign="right") # Converting slave image in jpg to tif img = Image.open('images/Butuan.Slave.02.jpg') # input img.save('images/temp/Butuan.Slave.02.tiff') # output # slave image is not yet georeferenced # its raw coordinates maybe checked using the GDAL affine coefficients img2_tif = 'images/temp/Butuan.Slave.02.tiff' slave_tif = gdal.Open(img2_tif) c2, a2 , b2, f2, d2, e2 = slave_tif.GetGeoTransform() print c2, a2 , b2, f2,d2 ,e2 # will show 0.0 1.0 0.0 0.0 0.0 1.0, still in pixel coord (offset)
def create_shape_datacard(df_obs, df_rate, df_nuis, params, filename, name): # IDX dc = tab([ ["imax * number of channels"], ["jmax * number of backgrounds"], ["kmax * number of nuisance parameters"], ], [], tablefmt="plain") + "\n" + "-" * 80 + "\n" # SHAPES df_obs = df_obs.reset_index() dc += tab([[ "shapes", "*", "*", "Zinv_METnoX-ShapeTemplates_{}.root".format(name), "$CHANNEL/$PROCESS", "$CHANNEL/$SYSTEMATIC/$PROCESS" ]], [], tablefmt="plain") + "\n" + "-" * 80 + "\n" # OBS dc += tab([ ["bin"] + list(df_obs["region"]), ["observation"] + [-1] * df_obs.shape[0], ], [], tablefmt="plain") + "\n" + "-" * 80 + "\n" # RATE df_rate = df_rate.reset_index() dc += tab([ ["bin"] + list(df_rate["region"]), ["process"] + list(df_rate["process"]), ["process"] + map(int, list(df_rate["proc"])), ["rate"] + [-1] * df_rate.shape[0], ], [], tablefmt="plain") + "\n" + "-" * 80 + "\n" # NUISANCES nuisances = [] for c in df_nuis.columns: syst = c[:-2] if c.endswith("Up") else c[:-4] if c.endswith( "Down") else c if syst not in nuisances and "nominal" not in syst: nuisances.append(syst) nuisance_block = [] for nuis in nuisances: if nuis in ["lumi"]: nuisance_subblock = [nuis, "lnN"] else: nuisance_subblock = [nuis, "shape"] for up, down in zip(df_nuis[nuis + "Up"], df_nuis[nuis + "Down"]): if nuis in ["lumi"]: value = str(np.sqrt(up / down)) else: value = 1 if np.isnan([up, down]).any(): # non-number value = "-" else: # number if np.abs(up * down - 1) < 0.005: # symmetric mean = np.sqrt(up / down) if np.abs(mean - 1) < 1e-5: # zero value = "-" nuisance_subblock.append(value) nuisance_block.append(nuisance_subblock) dc += tab(nuisance_block, [], tablefmt="plain") + "\n" + "-" * 80 + "\n" # PARAMS if params is not None: dc += tab(params, [], tablefmt="plain") with open(filename, 'w') as f: f.write(dc) logger.info("Created {}".format(filename))
def print_ip_table(avail, na_avail): t = {'Reachable': avail, 'Unreachable': na_avail} print(tab(t, headers='keys'))
def pred_counts(model_or_info, X, y, n_top=3, results='s', n_obs=None, n_find=None, mask_zero=True, stateful=False, coder=None): ''' Returns a summary of whether the top predictions were found in the actual subsequent timesteps - Rows represent which prediction it was (0=prediction with highest probability) - Columns show the percentage where this prediction occured: - 0 = not at all - 1 = as the next item - 2 = as the 2nd item...etc - E.g. 36.60% of the top predictions were found in the next position n_Top 0 1 2 3 ------- -------- --------- --------- ---------- 0 0.612539 0.366063 0.0134987 0.00789921 1 0.916608 0.0556944 0.0150985 0.0125987 2 0.981702 0.0133987 0.0029997 0.00189981 :param model_or_info: :param X: :param y: :param n_top: :param results: Set to 's' for Summary, 'c' for Counts, 'p' for Prefixes, 'd' for Detail, or combinations e.g. 'scd' :param n_obs: :param n_find: :param mask_zero: :param stateful: :param coder: model_info = model1_info model = model_info['model'] y=Y :return: ''' import collections from tabulate import tabulate as tab if (type(model_or_info) is dict): model = model_or_info['model'] else: model = model_or_info print("Evaluating predictions for model {}".format(model.name)) # model=model5 details = True if 'd' in results else False summary = True if 's' in results else False prefixes = True if 'p' in results else False counts = True if 'c' in results else False if n_obs is None: n_obs = X.shape[0] n_time = X.shape[1] n_cats = y.shape[2] if n_find is None: n_find = n_time print("- based on {} obs with {} categories and upto {} timesteps ".format( n_obs, n_cats, n_time)) # Counts summarise for each (prefix size, prediction rank, find position) countsD = collections.defaultdict(lambda: 0) detail = [] # i=91 #Xi = X[i,...] #yi = y[i,...] def update_for_obs(Xi, yi, prob_i, n_time_i): # Get actual values for all but the last timestep (which predicts a padded value) y_cats = np.argmax(yi[0:n_time_i - 1], axis=1) if coder: y_cats = coder.inverse_transform(y_cats - 1) # For each position 'j' in the time steps where a prediction is made # j=0 for j in range(n_time_i): # Get predicted categories for this position in descending order: probSr = most likely category first in series if coder: probSr = pa.Series( data=prob_i[j, 1:], index=coder.classes_).sort_values(ascending=False) else: probSr = pa.Series(data=prob_i[j]).sort_values(ascending=False) # Get all the remaining actual categories y_rest = y_cats[j:] # Loop through the top predictions # t=2 for t in range(n_top): next_top_pred = probSr.index[ t] # Predicted code (or desc if coder provided) next_top_prob = probSr.values[t] # Probability of this code # See if the predicted code occurs in the actual values find_preds = list(np.where(y_rest == next_top_pred)[0]) # Check if it has been found, and how far ahead if (len(find_preds) > 0): # Use the first occurrence of the predicted value (index 0) # f=1 means found as the expect value for this time-step (i.e. it was the next item) f = find_preds[0] + 1 # Truncate if index where prediction is found is beyond the range if (f > n_find): f = n_find + 1 else: f = 0 # Not found # Always record count, so f=0 is count of when prediction was not found countsD[(j, t, f)] += 1 if details: # Get first part of X upto 'j' where the prediction is being made if coder: # Subtract 1 to allow for padding x_pfx = coder.inverse_transform(Xi[0:j + 1] - 1) else: x_pfx = Xi[0:j + 1] detail.append({ 'Pfx': x_pfx, 'Pred': next_top_pred, 'Prob': next_top_prob, 'n_Pfx': j, 'n_Top': t, 'n_Find': f }) for i in range(n_obs): # Find number of non-zero items for this obs, or use fixed n_time if not masking zero n_time_i = np.count_nonzero(X[i]) if mask_zero else n_time # Predict probability of each category for every timestep for this obs prob_i = predict_obs(model, X, i, n_time, n_cats, stateful=stateful, mask_zero=mask_zero) # Increment count matrix for these probabilities based on the number of non-zero inputs update_for_obs(X[i, ...], y[i, ...], prob_i, n_time_i) countsDf = pa.Series(countsD).reset_index() countsDf.columns = (['n_Pfx', 'n_Top', 'n_Find', 'Count']) summaryDf_tf = pa.pivot_table(countsDf, index='n_Top', columns='n_Find', values='Count', aggfunc=np.sum) summaryDf_t = pa.pivot_table(countsDf, index='n_Top', values='Count', aggfunc=np.sum) # summaryDf_t gives count of predictions for each value of n_Top. # - All will be the same: the number of input values that were not-padded and so had a prediction n_pred = summaryDf_t.iloc[0, 0] summaryDf = summaryDf_tf / n_pred if (type(model_or_info) is dict): model_or_info.update({'summary': summaryDf}) results = [] if summary: results.append(summaryDf) print(tab(summaryDf, headers='keys')) print() if counts: results.append(countsDf) if prefixes: summaryDf2 = pa.pivot_table(countsDf, index=['n_Pfx', 'n_Top'], columns='n_Find', values='Count', aggfunc=np.sum) pfxDf = pa.pivot_table(countsDf, index=['n_Pfx'], values='Count', aggfunc=np.sum) pfxDf = summaryDf2.join(pfxDf) for f in range(n_find): pfxDf["Pct{}".format(f)] = pfxDf[f] / pfxDf['Count'] results.append(pfxDf.reset_index()) if details: detailDf = pa.DataFrame.from_dict(detail)[[ 'Pfx', 'Pred', 'Prob', 'n_Pfx', 'n_Top', 'n_Find' ]] results.append(detailDf) if (len(results) == 1): return results[0] else: return tuple(results)
def __str__(self): return tab(self.data,tablefmt="grid")
# ADD VALUE ON THE TOP OF VERTICAL BARS def autolabel(rects): for idx, rect in enumerate(bar_plot): height = rect.get_height() plt.text(rect.get_x() + rect.get_width() / 2, height, chartValue[idx], ha='center', va='bottom', rotation=0) autolabel(bar_plot) plt.title('Registrations by month', pad=20, fontsize=15) fig5.savefig(workDirectory + 'myplot5.png', dpi=100) plt.show() plt.clf() im = Image.open(workDirectory + 'myplot5.png') bordered = ImageOps.expand(im, border=1, fill=(0, 0, 0)) bordered.save(workDirectory + 'myplot5.png') os.remove(workDirectory + 'myplot5.png') print( tab(df_Created_count.head(30), headers='keys', tablefmt='psql', showindex=False))
def pprint(array): print(tab(array[::-1], headers='keys', tablefmt='fancy_grid'))
def tab_all_releases(self, releases_data): self.print_help( tab(releases_data, tablefmt="plain", headers=["ID", "Release name", "Last import"]))
def main(debug=None): args = docopt(__doc__, version='VCF-Toolbox v0.1', argv=debug, options_first=False) # Setup Genomes Directory if args["location"] and args["<path>"]: if args["<path>"] == "-": genome_directory = get_genome_directory_file() os.remove(genome_directory) return get_genome_directory_file() else: with open(get_genome_directory_file(), "w") as f: genome_directory = os.path.realpath(args["<path>"]) with indent(2): puts(colored.blue("\nSet genome location to: " + genome_directory + "/\n")) f.write(genome_directory) # create directory if not exists if not os.path.exists(genome_directory): os.makedirs(genome_directory) return genome_directory if args["--directory"]: genome_directory = os.path.realpath(args["--ref"]) else: genome_directory = get_genome_directory() with indent(2): puts(genome_directory) if args["location"] and not args["<path>"]: return genome_directory genome_db = get_genome_directory() + "/genomes.db" ################ # List Genomes # ################ if args["list"]: output_genome_list() ################## # Search Genomes # ################## elif args["--search"]: # Download and cache a list of genomes from NCBI for searching download_genomes(genome_db) # Cache result header = ["assembly_accession", # 0 "bioproject", # 1 "organism_name", # 7 "asm_name", # 15 "ftp_path"] # 19 with indent(2): puts(colored.blue('\nSearching...\n')) with open(genome_db, "r") as f: results = [] for line in f: if not line.startswith("#"): line = line.strip().split("\t") line = [x for k, x in enumerate(line) if k in [0, 1, 7, 15, 19]] if args["--search"].lower() in line[2].lower() and line[4] != "na": results.append(line) with indent(4): puts(tab(results, headers=header)) with indent(2): puts(colored.blue('\nTo download a genome and setup for use:')) with indent(4): puts(colored.green("\nvk genome ncbi --ref=<asm_name>\n")) return results elif args["--ref"]: # reference name. reference_name = args["--ref"] # Ensure genome db is available download_genomes(genome_db) # reference directory if not args["--directory"]: reference_directory = genome_directory + "/" + reference_name + "/" else: reference_directory = genome_directory + "/" if not os.path.exists(reference_directory): os.makedirs(reference_directory) # base reference filename. ref_filename = reference_directory + reference_name + ".tmp.fa.gz" if args["wormbase"]: asm_url = "ftp://ftp.wormbase.org/pub/wormbase/releases/{asm_name}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{asm_name}.genomic.fa.gz" reference_download = asm_url.format(asm_name=args["--ref"]) comm = "curl {reference_download} > {ref_filename}".format(**locals()) print(comm) call(comm, shell = True) # Unzip wormbase genome call(["gunzip", "-f", ref_filename]) else: # NCBI with open(genome_db, "r") as f: results = [] for line in f: if not line.startswith("#"): line = line.strip().split("\t") line = [x for k, x in enumerate(line) if k in [0, 1, 7, 15, 19]] if args["--ref"] == line[3]: results.append(line) reference_download = results[0] url = reference_download[4].replace("ftp://", "http://") + "/" + os.path.split(reference_download[4])[1] + "_genomic.fna.gz" if len(results) == 0: with indent(2): puts(colored.red('\nError: Genome ' + args["--ref"] + ' not found\n')) with indent(2): puts(colored.green('\nDownloading: ' + reference_name + "; " + url + '\n')) # stack overflow: 15644964; r = requests.get(url, stream=True) with open(ref_filename, 'wb') as f: total_length = int(r.headers.get('content-length')) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): if chunk: f.write(chunk) f.flush() # Fix chromosome names if not args["--accession-chrom-names"] and not args['wormbase']: with indent(2): puts(colored.green('\nFixing chromosome names\n')) with open(ref_filename.replace(".fa.gz", ".fa"), 'w') as outfa: with gzip.open(ref_filename, 'rb') as f: for line in f: outline = line if line.startswith(">"): acc = line.split(" ")[0].strip(">") chrom_name = fetch_chrom_name(acc) if chrom_name is not None: outline = ">" + chrom_name + "\n" elif line.lower().find("mitochon") > 0: outline = ">MtDNA\n" puts(colored.blue(line.strip("\n>")) + " --> " + colored.blue(outline.strip("\n>"))) outfa.write(outline) if which("bgzip"): with indent(2): puts(colored.green('\nSwitching from gzip to bgzip\n')) # Convert to bgzip if args["--accession-chrom-names"]: call(["gunzip", "-f", ref_filename]) comm_bgzip = "bgzip -fc {ref_filename} > {ref_out}" comm_bgzip = comm_bgzip.format(ref_filename=ref_filename.replace(".fa.gz", ".fa"), ref_out=ref_filename.replace(".tmp", "")) print(comm_bgzip) call(comm_bgzip, shell=True) ref_filename = ref_filename.replace(".tmp", "") else: with indent(2): puts_err(colored.red("Please install bgzip.")) exit() if which("bwa"): with indent(2): puts(colored.green("\nCreating bwa index\n")) call(["bwa", "index", ref_filename]) else: with indent(2): puts(colored.blue("\nSkipping bwa index; bwa not installed\n")) if which("samtools"): with indent(2): puts(colored.green("\nCreating samtools index\n")) call(["samtools", "faidx", ref_filename]) else: with indent(2): puts(colored.blue("\nSkipping samtools index; Samtools not installed\n")) if which("makeblastdb"): with indent(2): puts(colored.green("\nCreating blast database\n")) comm = "gunzip -c {ref} | makeblastdb -in - -dbtype=nucl -title={ref} -out={ref}".format(ref=ref_filename) call(comm, shell=True) else: with indent(2): puts(colored.blue("\nSkipping creation of blast database; blast is not installed\n")) # Remove temp files if args["--accession-chrom-names"]: os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz")) # Remove temporary files try: os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz")) os.remove(ref_filename.replace(".fa.gz", ".tmp.fa")) except: pass # Add error checking here... with indent(2): puts(colored.green("\nComplete!\n"))
def from_stations(): response = requests.get(BR_API_FROM_STATIONS) stations = [[i['stn_code'], i['stn_name']] for i in response.json()] print(tab(stations, headers=['Code', 'Station Name']))
def tab_mixes_list(self, mixes_data): tabulated = tab( mixes_data, tablefmt="simple", headers=["Mix #", "Name", "Played", "Venue", "Created", "Updated"]) self.print_help(tabulated)
#print "Lat:", xp #print "Lon:", yp print "Master Global Coord:", final_coord print "Slave Global Coord:", final_coord_d #print "Master KPs Coord", pixel_coord #print "Slave KPs Coord:", dst_pts #print "Image 1 Size:", img1.shape #print "Image 2 Size:", img2.shape #print "RMSE per GCP Northing:", rmse_per_gcp_x #print "RMSE per GCP Easting:", rmse_per_gcp_y #print "RMSE per GCP:", rmse_per_gcp #cv2.imshow("Matched", view) #plt.imshow(view) #print tab(final_coord, headers,numalign="right") print tab(d) #print tab(final_coord_d, headers,numalign="right") #print tab(rpg) print C, A,B,F, D2,E #print C, A,B,F, D2,E #plt.plot (gcp, rmse_per_gcp_x, "bs") #plt.plot (gcp, rmse_per_gcp_y, "ro") #cv2.imshow("Matched", view) #cv2.waitKey(1000) #plt.imshow(view) plt.show (10000) #print s #print d print final_coord print final_coord_d
def to_stations(dest): response = requests.get(F"{BR_API_TO_STATIONS}/{dest}") stations = [[i['stn_code'], i['dest']] for i in response.json()] print(tab(stations, headers=['Code', 'Destination Name']))
im = Image.open(workDirectory+'myplot10.png') bordered = ImageOps.expand(im, border=1, fill=(0, 0, 0)) bordered.save(workDirectory+'myplot10.png') # INSERT IN EXCEL img = openpyxl.drawing.image.Image(workDirectory+'myplot10.png') img.anchor = 'E4' workbook['Degrees'].add_image(img) workbook.save(outputExcelFile) # REMOVE PICTURES os.remove(workDirectory+'myplot1.png') os.remove(workDirectory+'myplot2.png') os.remove(workDirectory+'myplot3.png') # os.remove(workDirectory+'myplot4.png') os.remove(workDirectory+'myplot5.png') os.remove(workDirectory+'myplot6.png') os.remove(workDirectory+'myplot7.png') os.remove(workDirectory+'myplot8.png') os.remove(workDirectory+'myplot10.png') os.remove(workDirectory+'mymap1.png') os.remove(workDirectory+'mymap3.png') # TERMINAL OUTPUTS AND TESTS print(tab(df_Degrees_count, headers='keys', tablefmt='psql', showindex=False)) print(today) print("OK, export done!")