def loadState(file, name='data', raiseException=True): import compress_pickle pklfile = f'save_data/{file}/{name}.pkl' try: if (os.path.exists(pklfile)): # with open(pklfile, 'rb') as f: res = compress_pickle.load(pklfile) # f.close() saveState(res, file, name) os.remove(pklfile) return res # if(name=='data'): # from metric.CMbasedMetric import CMbasedMetric # from metric.event_confusion_matrix import event_confusion_matrix # [run_info,datasetdscr,evalres]=compress_pickle.load(pklfile+'.lz4') # for i in evalres: # data=evalres[i]['test'] # Sdata=data.Sdata # import combiner.SimpleCombiner # com=combiner.SimpleCombiner.EmptyCombiner2() # evalres[i]['test'].Sdata.pred_events =com.combine(Sdata.s_event_list,Sdata.set_window,data.predicted) # evalres[i]['test'].event_cm =event_confusion_matrix(Sdata.a_events,Sdata.pred_events,datasetdscr.activities) # evalres[i]['test'].quality =CMbasedMetric(data.event_cm,'macro',None) # return [run_info,datasetdscr,evalres] return compress_pickle.load(pklfile + '.lz4') except: if (raiseException): raise return None
def remove_training_data(): from compress_pickle import load training_data_path = "/home/pepamengual/UEP/trained_model/UEP_trained_model_4" skempi_data_path = "/home/pepamengual/UEP/trained_model/substracted_4" substracted_model = {} training_data = load(training_data_path, compression="lzma", set_default_extension=False) skempi_data = load(skempi_data_path, compression="lzma", set_default_extension=False) for environment, amino_acid_dict in training_data.items(): for amino_acid, counts in amino_acid_dict.items(): if environment in skempi_data and amino_acid in skempi_data[ environment]: substract = counts - skempi_data[environment][amino_acid] substracted_model.setdefault(environment, {}).setdefault( amino_acid, substract) else: substracted_model.setdefault(environment, {}).setdefault( amino_acid, counts) from compress_pickle import dump dump(substracted_model, "substracted_def_4", compression="lzma")
def parallelPlotter(i, xpNum, digs, fName, PT_IMG): xpNumCS = str(i + 1).zfill(4) print('* Exporting {}/{}'.format(xpNumCS, xpNumS), end='\r') # Name formatting -> tuple-------------------------------------------- repsRatios = np.load(fName) fList = fName.split('/')[-1].split('-')[0].split('_')[1:] fList.append(fName.split('/')[-1].split('-')[1].split('_')[1]) fKeys = tuple(list(map(int, fList))) # Select cmap -------------------------------------------------------- (scalers, HD_DEP, IND_RAN, palette) = aux.selectDepVars('TTI', AOI) cmap = palette.reversed() # load TTI and TTO --------------------------------------------------- ttiR = pkl.load(PT_MTR+'{}_TTI_{}_mlr.bz'.format(AOI, QNT)) tti = ttiR[fKeys][int(thi*100)] ttoR = pkl.load(PT_MTR+'{}_TTO_{}_mlr.bz'.format(AOI, QNT)) tto = ttoR[fKeys][int(tho*100)] # Plotting------------------------------------------------------------- (fig, ax) = plt.subplots(nrows=1, ncols=1) ax.imshow(repsRatios, cmap=cmap) # add TTI------------------------------------------------------------- [plt.axvline(i, color='black', alpha=.65, lw=0.175, ls='-.') for i in tti] # add TTO------------------------------------------------------------- [plt.axvline(j, color='black', alpha=.75, lw=0.2, ls='dotted') for j in tto] # Save the figure------------------------------------------------------ outName = fName.split('/')[-1].split('.')[0][:-4] plt.xlim(X_RAN) ax.axes.xaxis.set_ticklabels([]) ax.axes.yaxis.set_ticklabels([]) ax.axes.xaxis.set_visible(False) ax.axes.yaxis.set_visible(False) ax.xaxis.set_tick_params(size=0) ax.yaxis.set_tick_params(size=0) plt.savefig(PT_IMG+outName + '.png', bbox_inches='tight', pad_inches=0.01, dpi=500) plt.close("all")
def test_dump_load(dump_load): ( message, path, compression, set_default_extension, optimize, expected_file, expected_fail, ) = dump_load with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) if expected_fail is None: dump( message, path, compression, set_default_extension=set_default_extension, optimize=optimize, ) loaded_message = load( path, compression, set_default_extension=set_default_extension ) assert loaded_message == message else: with pytest.raises(expected_fail): dump( message, path, compression, set_default_extension=set_default_extension, optimize=optimize, ) with pytest.raises(expected_fail): load(path, compression, set_default_extension=set_default_extension)
def _get_hard_and_rel_sub_dicts(self, sample_names): if os.path.isfile( os.path.join(self.cache_dir, 'hard_sub_sample_dict.p.bz')): if os.path.isfile( os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz')): return compress_pickle.load( os.path.join( self.cache_dir, 'hard_sub_sample_dict.p.bz')), compress_pickle.load( os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz')) hard_sub_sample_dict = {} rel_sub_sample_dict = {} count = 0 tot_samples = len(sample_names) for sample_name in sample_names: count += 1 sys.stdout.write(f'\r{sample_name}: {count}/{tot_samples}') abund_list = self.absolute_consolidated_abundance_dict[sample_name] if sum(abund_list) < 10000: continue # Make a redundant list of the seqs non_z = [] for i, abund in enumerate(abund_list): if abund > 0: non_z.append(i) redundant_list = [] # prob_list = [] tot = sum(abund_list) for i in non_z: seq = self.ordered_seq_names[i] abund = abund_list[i] # prob = abund/tot redundant_list.extend([seq for _ in range(abund)]) # prob_list.extend([prob for _ in range(abund)]) hard_sub_sample_list = np.random.choice(redundant_list, 10000, replace=False) hard_abunds_dict = dict(Counter(hard_sub_sample_list)) hard_sub_sample_dict[sample_name] = hard_abunds_dict # For soft norm_abund_dict = { self.ordered_seq_names[i]: int((abund_list[i] / tot) * 100) for i in non_z if int((abund_list[i] / tot) * 10000) > 0 } rel_sub_sample_dict[sample_name] = norm_abund_dict compress_pickle.dump( hard_sub_sample_dict, os.path.join(self.cache_dir, 'hard_sub_sample_dict.p.bz')) compress_pickle.dump( rel_sub_sample_dict, os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz')) return hard_sub_sample_dict, rel_sub_sample_dict
def landSelector(USR, LND): pth = aux.selectGeoPath(USR) if (LND == '02'): pts = pkl.load(path.join(pth, 'CLS_02.bz')) elif (LND == '10'): pts = pkl.load(path.join(pth, 'CLS_10.bz')) elif (LND == '01'): pts = pkl.load(path.join(pth, 'CLS_01.bz')) return pts['groups']
def _make_abundance_df(self): # Dict that we will populate and then use to make the abundance_df df_dict = {} print('making abundance df') for readset in self.readsets: sys.stdout.write(f'\r{readset}') sample_qc_dir = os.path.join(self.parent.qc_dir, readset) # make a seq_name to abundance dict from the fasta and .names pair seq_abund_dict = self._make_abund_dict_from_names_path( readset=readset) # For the all_taxa, we will go sequence by sequence through the fasta file fasta_path = os.path.join( sample_qc_dir, 'stability.trim.contigs.good.unique.abund.pcr.unique.fasta') fasta_file_as_list = EighteenSBase.decompress_read_compress( fasta_path) fasta_names = [ line.split('\t')[0][1:] for line in fasta_file_as_list if line[0] == '>' ] # then load the three dictionaries sample_annotation_dict = compress_pickle.load( os.path.join(sample_qc_dir, 'sample_annotation_dict.p.bz')) coral_annotation_dict = compress_pickle.load( os.path.join(sample_qc_dir, 'coral_annotation_dict.p.bz')) sample_count_dict = { cat: 0 for cat in self.parent.plotting_categories } if self.parent.plot_type == 'all_taxa': self._log_abundances_all_taxa(sample_annotation_dict, sample_count_dict, seq_abund_dict, coral_annotation_dict, fasta_names) elif self.parent.plot_type == 'all_coral_genus': self._log_abundances_all_coral_genus(sample_annotation_dict, sample_count_dict, seq_abund_dict, coral_annotation_dict) else: raise NotImplementedError # Now add the collected abundances to the sample df_dict # Making them relative by dividing by the total of the sample_count_dd df_dict[readset] = [ sample_count_dict[cat_key] / sum(sample_count_dict.values()) for cat_key in self.parent.plotting_categories ] # Now create the df from the df_dict return pd.DataFrame.from_dict(data=df_dict, orient='index', columns=self.parent.plotting_categories)
def __init__(self, parent, readset): # Here we will not assign all of the information we want from the provenance table # to a variable. We will only assign those things that we need to make our host-related # columns from. I think this is just the provenance_annotation. self.parent = parent self.readset = readset self.sample_id = self.parent.fastq_info_df.at[readset, 'sample-id'] self.use = True # we will add a column which is do_not_use_reason # This will be a string value of either: # "tax_annotation_mismatch" -- > if the genetic tax annotation does not match the sample provenance annotation # "putative_intra_genus_contamination" # "inter_genus_contamination" # "low_host_sequence_abundance" # "not_of_target_genus" # "different_primary_sequence" self.do_not_use_reason = [] self.sample_qc_dir = os.path.join(self.parent.qc_dir, readset) self.coral_annotation_dict = compress_pickle.load( os.path.join(self.sample_qc_dir, 'coral_annotation_dict.p.bz')) self.consolidated_host_seqs_abund_dict = compress_pickle.load( os.path.join(self.sample_qc_dir, 'consolidated_host_seqs_abund_dict.p.bz')) self.rel_all_seq_abundance_dict = compress_pickle.load( os.path.join(self.sample_qc_dir, 'rel_all_seq_abundance_dict.p.bz')) self.coral_tax_rel_count_dd = self._make_coral_tax_rel_count_dd() self.sorted_coral_tax_dict_keys = sorted( self.coral_tax_rel_count_dd, key=self.coral_tax_rel_count_dd.get, reverse=True) self.genus_18S_taxonomic_annotation = self.sorted_coral_tax_dict_keys[ 0] self.provenance_annotation = self.parent.sample_provenance_df.at[ self.sample_id, 'Sample Material label, organismal system level, taxonomic, nominal'] # The remainder of the variables that we need to populate self.is_provenance_tax_annotation_correct = None self.inter_genus_contamination_rel_abund = None self.is_inter_genus_contamination = None self.primary_sequence = None self.host_rel_abund = None self.putative_intra_genus_contamination_ratio = None self.is_putative_intra_genus_contamination = None self.is_representative_for_sample = None self.post_qc_seq_depth = None # Variables that are only associated with processing a Heliopora samples self.sample_annotation_dict = None self.fasta_dict = None self.all_tax_count_dd = None
def _init_color_dict(self): if self.plot_type == 'all_taxa': col_dict = { 'Porites': '#FFFF00', 'Pocillopora': '#87CEFA', 'Millepora': '#FF6347', 'other_coral': '#C0C0C0', 'Symbiodiniaceae': '#00FF00', 'other_taxa': '#696969', 'not_annotated': '#282828' } return [ 'Pocillopora', 'Porites', 'Millepora', 'other_coral', 'Symbiodiniaceae', 'other_taxa', 'not_annotated' ], col_dict elif self.plot_type == 'all_coral_genus': col_dict = { 'Porites': '#FFFF00', 'Pocillopora': '#87CEFA', 'Millepora': '#FF6347', 'other_coral': '#C0C0C0' } return ['Pocillopora', 'Porites', 'Millepora', 'other_coral'], col_dict elif self.plot_type in ['all_coral_sequence', 'minor_coral_sequence']: col_dict = compress_pickle.load( os.path.join(self.cache_dir, 'all_coral_sequence_color_dict.p.bz')) return None, col_dict else: raise NotImplementedError()
def unpickle_file(filename): """Returns the result of unpickling the file content.""" return compress_pickle.load(filename, compression="lzma", set_default_extension=False) with open(filename, 'rb') as f: return pickle.load(f)
def __init__(self, kernel, machine, cores=1): """Initialize cache simulation based predictor from kernel and machine object.""" CachePredictor.__init__(self, kernel, machine, cores) if isinstance(kernel, KernelCode): # Make use of caching for symbolic LC representation: file_name = 'CSIM_analysis.pickle.lzma' file_path = kernel.get_intermediate_location( file_name, machine_and_compiler_dependent=False, other_dependencies=[str(cores)] + [str(t) for t in self.kernel.constants.items()]) lock_mode, lock_fp = kernel.lock_intermediate(file_path) if lock_mode == fcntl.LOCK_SH: # use cache cache = compress_pickle.load(file_path) lock_fp.close() # release lock self.first_dim_factor = cache['first_dim_factor'] self.stats = cache['stats'] else: # lock_mode == fcntl.LOCK_EX # needs update self.simulate() compress_pickle.dump( { 'first_dim_factor': self.first_dim_factor, 'stats': self.stats }, file_path) lock_fp.close() # release lock else: # No caching support without filename for kernel code self.simulate()
def __init__(self, kernel, machine, cores=1, symbolic=False): """Initialize layer condition based predictor from kernel and machine object.""" CachePredictor.__init__(self, kernel, machine, cores=cores) if isinstance(kernel, KernelCode): # Make use of caching for symbolic LC representation: file_name = 'LC_analysis.pickle.lzma' file_path = kernel.get_intermediate_location( file_name, machine_and_compiler_dependent=False, other_dependencies=[str(cores)]) lock_mode, lock_fp = kernel.lock_intermediate(file_path) if lock_mode == fcntl.LOCK_SH: # use cache self.results = compress_pickle.load(file_path) lock_fp.close() # release lock else: # lock_mode == fcntl.LOCK_EX # needs update self.build_symbolic_LCs() compress_pickle.dump(self.results, file_path) lock_fp.close() # release lock else: # No caching support without filename for kernel code self.build_symbolic_LCs() if not symbolic: self.desymbolize()
def exportPstTracesParallel( exIx, expsNum, STABLE_T, THS, QNT, STYLE, PT_IMG, border=True, borderColor='#322E2D', borderWidth=1, labelPos=(.7, .9), xpsNum=0, digs=3, autoAspect=False, popScaler=1, wopPrint=True, cptPrint=True, poePrint=True, mnfPrint=True, ticksHide=True, transparent=True, sampRate=1, labelspacing=.1 ): (ix, repFile, tti, tto, wop, mnf, _, poe, cpt) = exIx repDta = pkl.load(repFile) # Print to terminal ------------------------------------------------------- padi = str(ix+1).zfill(digs) fmtStr = '{}+ File: {}/{}' print(fmtStr.format(monet.CBBL, padi, expsNum, monet.CEND), end='\r') # Traces ------------------------------------------------------------------ pop = repDta['landscapes'][0][STABLE_T][-1] # STYLE['yRange'] = (0, pop*popScaler) monet.exportTracesPlot( repDta, repFile.split('/')[-1][:-6]+str(QNT), STYLE, PT_IMG, vLines=[tti, tto, 0], hLines=[mnf*pop], labelPos=labelPos, border=border, borderColor=borderColor, borderWidth=borderWidth, autoAspect=autoAspect, popScaler=popScaler, wop=wop, wopPrint=wopPrint, cpt=cpt, cptPrint=cptPrint, poe=poe, poePrint=poePrint, mnf=mnf, mnfPrint=mnfPrint, ticksHide=ticksHide, transparent=True, sampRate=sampRate, labelspacing=labelspacing ) return None
def test_dump_load_on_filestreams(simple_dump_and_remove): path, compression, message, optimize = simple_dump_and_remove read_mode = "rb" # get_compression_read_mode(compression) write_mode = "wb" # get_compression_write_mode(compression) with open(path, write_mode) as f: dump(message, f, compression=compression, optimize=optimize) with open(path, read_mode) as f: raw_content = f.read() f.seek(0) loaded_message = load(f, compression=compression) assert loaded_message == message os.remove(path) dump( message, path, compression=compression, set_default_extension=False, optimize=optimize, ) with open(path, read_mode) as f: benchmark = f.read() # zipfile compression stores the data in a zip archive. The archive then # contains a file with the data. Said file's mtime will always be # different between the two dump calls, so we skip the follwing assertion if compression != "zipfile": assert raw_content == benchmark
def _get_snp_classifications(self, genus): if genus == 'Pocillopora': # First check to see if the cached version exists snp_cache_dir = os.path.join(self.input_dir_18s, 'snp_classifications', f'poc_snp_class_df.p.bz') elif genus == 'Porites': snp_cache_dir = os.path.join(self.input_dir_18s, 'snp_classifications', f'por_snp_class_df.p.bz') if os.path.exists(snp_cache_dir): return compress_pickle.load(snp_cache_dir) else: # Need to create it from scratch if genus == 'Pocillopora': raw_snp_class_path = os.path.join( self.input_dir_18s, 'snp_classifications', f'POC_SNP_classifications.csv') elif genus == 'Porites': raw_snp_class_path = os.path.join( self.input_dir_18s, 'snp_classifications', f'POR_SNP_classifications.csv') snp_class_df = pd.read_csv(raw_snp_class_path, index_col=0) snp_class_df.index = self._convert_index_to_sample_ids( snp_class_df.index) snp_class_df.dropna(inplace=True) snp_class_df.columns = ['label'] compress_pickle.dump(snp_class_df, snp_cache_dir) return snp_class_df
def _pop_for_heliopora_sample(self): self.use = False self.do_not_use_reason.append("not_of_target_genus") self.sample_annotation_dict = compress_pickle.load( os.path.join(self.sample_qc_dir, 'sample_annotation_dict.p.bz')) self.fasta_dict = self._make_fasta_dict() self.all_tax_count_dd = self._make_all_tax_count_dd() self.genus_18S_taxonomic_annotation = sorted( self.all_tax_count_dd, key=self.all_tax_count_dd.get, reverse=True)[0] self._set_is_provenance_tax_annotation_correct() # set inter coral contamination fields to nan self.inter_genus_contamination_rel_abund = np.nan self.is_inter_genus_contamination = np.nan # primary seq should be the most abundant seq in the sample self.primary_sequence = self.fasta_dict[sorted( self.rel_all_seq_abundance_dict, key=self.rel_all_seq_abundance_dict.get, reverse=True)[0]] # But we set is different to nan self.is_different_primary_sequence = np.nan # host rel abund will be the abund of heliopora self._set_host_rel_abund_heliopora() # Set the intragenus to nan self.putative_intra_genus_contamination_ratio = np.nan self.is_putative_intra_genus_contamination = np.nan self._set_post_qc_seq_depth() self._set_is_representative_for_sample() self._populate_coral_meta_info_table_dict()
def exportPreTracesParallel(exIx, STYLE, PT_IMG, border=True, borderColor='#322E2D', borderWidth=1, autoAspect=False, xpNum=0, digs=3, vLines=[0, 0], hLines=[0], popScaler=1, sampRate=1): monet.printProgress(exIx[0], xpNum, digs) repFilePath = exIx[1][1] repDta = pkl.load(repFilePath) name = path.splitext(repFilePath.split('/')[-1])[0][:-4] monet.exportTracesPlot(repDta, name, STYLE, PT_IMG, wopPrint=False, autoAspect=autoAspect, border=border, borderColor=borderColor, borderWidth=borderWidth, sampRate=sampRate) return None
def load_rearrange_data_from_path( cls, stage: str, base_dir: Optional[str] = None, ) -> Dict[str, List[Dict[str, Any]]]: stage = stage.lower() if stage == "valid": stage = "val" data_path = os.path.abspath(os.path.join(base_dir, f"{stage}.pkl.gz")) if not os.path.exists(data_path): raise RuntimeError(f"No data at path {data_path}") data = compress_pickle.load(path=data_path) for scene in data: for ind, task_spec_dict in enumerate(data[scene]): task_spec_dict["scene"] = scene if "index" not in task_spec_dict: task_spec_dict["index"] = ind if "stage" not in task_spec_dict: task_spec_dict["stage"] = stage return data
def main(): skempi_path = "skempi/skempi_v2.csv" model_trained = "trained_model/UEP_trained_model_4" training_data = load(model_trained, compression="lzma", set_default_extension=False) skempi_processed_data_single, skempi_processed_data_multiple, skempi_processed_data_single_no_renamed = read_skempi.process_skempi_data( skempi_path) data = {} for entry, value in skempi_processed_data_single.items(): pdb = entry.split("_")[0] mutation_info = entry.split("_")[-1] chain = mutation_info[1] mutation_cleaned = "{}{}".format(mutation_info[0], mutation_info[2:]) data.setdefault(pdb, []).append([chain, mutation_cleaned]) for pdb, info_list in data.items(): with open("skempi/mcsm/mutation_lists/{}.txt".format(pdb), "w") as f: for mutation in info_list: chain = mutation[0] mutation_cleaned = mutation[1] to_write = "{} {}".format(chain, mutation_cleaned) f.write(to_write + "\n")
def exportPreTracesPlotWrapper(expIx, fLists, STYLE, PT_IMG, border=True, borderColor='#322E2D', borderWidth=1, autoAspect=False, xpNum=0, digs=3, vLines=[0, 0], hLines=[0], popScaler=1, transparent=False): ter.printProgress(expIx + 1, xpNum, digs) (_, repDta) = [pkl.load(file) for file in (fLists[expIx])] name = path.splitext(fLists[expIx][0].split('/')[-1])[0][:-4] # Export plots -------------------------------------------------------- exportTracesPlot(repDta, name, STYLE, PT_IMG, wopPrint=False, autoAspect=autoAspect, border=border, borderColor=borderColor, borderWidth=borderWidth, transparent=transparent, vLines=vLines, hLines=hLines) return None
def __init__(self): super().__init__() self._add_additional_info_to_info_df() self.absolute_consolidated_abundance_dict = compress_pickle.load( os.path.join(self.cache_dir, 'consolidated_df_dict_output_tables.p.bz')) self.fig, self.ax = plt.subplots(6, 1)
def get_scene_to_obj_name_to_seen_positions(): scene_to_task_spec_dicts = compress_pickle.load( os.path.join(STARTER_DATA_DIR, f"train.pkl.gz")) assert len(scene_to_task_spec_dicts) == 80 and all( len(v) == 50 for v in scene_to_task_spec_dicts.values()) scene_to_obj_name_to_positions = {} for scene in tqdm.tqdm(scene_to_task_spec_dicts): obj_name_to_positions = defaultdict(lambda: []) for task_spec_dict in scene_to_task_spec_dicts[scene]: for od in task_spec_dict["openable_data"]: obj_name_to_positions[od["name"]].extend( (od["start_openness"], od["target_openness"])) for sp, tp in zip(task_spec_dict["starting_poses"], task_spec_dict["target_poses"]): assert sp["name"] == tp["name"] position_dist = IThorEnvironment.position_dist( sp["position"], tp["position"]) rotation_dist = IThorEnvironment.angle_between_rotations( sp["rotation"], tp["rotation"]) if position_dist >= 1e-2 or rotation_dist >= 5: obj_name_to_positions[sp["name"]].append( [sp["position"][k] for k in ["x", "y", "z"]]) obj_name_to_positions[sp["name"]].append( [tp["position"][k] for k in ["x", "y", "z"]]) scene_to_obj_name_to_positions[scene] = { k: np.array(v) for k, v in obj_name_to_positions.items() } return scene_to_obj_name_to_positions
def parse_information_files(list_of_enzyme_commission_numbers, list_of_databases, cpus): output_path = "data/parsed_raw/parsed_dictionary_of_raw_data.gzip" if not os.path.exists(output_path): print("Creating information compressed file. Saved at {}".format( output_path)) work_list = [] for enzyme in list_of_enzyme_commission_numbers: for database in list_of_databases: file_path = "data/raw/{}_{}_raw.txt".format(enzyme, database) work_list.append((file_path, )) result_list = run_multiprocessing.run_mp(work_list, cpus, process_file) processed_results = split_results(result_list) dump(processed_results, output_path, compression="gzip", set_default_extension=False) else: print("Information file have been found at {}. Loading it.".format( output_path)) processed_results = load(output_path, compression="gzip", set_default_extension=False) print("File loaded") return processed_results
def load_pickle(self, filename): ''' Reloads compressed binary file saved by save_pickle. ''' try: infile = open(filename, 'rb') except IOError: print("File not found: \"{}\"".format(filename)) return indata = load(infile, compression="gzip") infile.close() self.net_thickness = indata[0] self.net_width = indata[1] self.net_length = indata[2] self.slots_in = indata[3] self.slots_out = indata[4] self.angle_in = indata[5] self.angle_out = indata[6] self.spi_in = indata[7] self.tol = indata[8] self.knots = indata[9] self.probe_pts = indata[10] self.probe_samples = indata[11] self.spi_in = (self.slots_in / (self.net_width * sin((pi / 2.) + self.angle_in))) self.spi_out = (self.slots_out / (self.net_width * sin((pi / 2.) + self.angle_out)))
def showPosts(): comments = [] reddit_link = request.form.get('reddit_link') submission = reddit.submission(url=reddit_link) title = submission.title selftext = submission.selftext submission.comments.replace_more(limit=0) swearwords_df = pd.read_csv('files/edited-swear-words.csv') swearwords = swearwords_df.swear.tolist() features = [ 'profanity', 'length', 'adjWordScore', 'NER_count', 'NER_match', 'WordScore', 'WholeScore', 'contains_url', 'no_url_WordScore', 'no_url_WholeScore', 'WordScoreNoStop', 'WholeScoreNoStop', 'no_url_or_stops_WholeScore', 'no_url_or_stops_WordScore' ] our_model = load("updated_model.pkl", compression="lzma", set_default_extension=False) punctuation_lst = [ ',', '.', '!', '?', '<', '>', '/', ':', ';', '\'', '\"', '[', '{', ']', '}', '|', '\\', '`', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+' ] article_url = submission.url cleaned_article_text = ab.clean_article(article_url) no_url_article_text = ab.remove_urls(cleaned_article_text) no_stop_article_text = ab.remove_stopwords(cleaned_article_text) no_stop_or_url_article_text = ab.remove_urls(no_stop_article_text) # rank by upvotes for comment in submission.comments: is_good_comment = ab.judgeComment( comment, reddit_link, swearwords, features, our_model, cleaned_article_text, no_url_article_text, no_stop_article_text, no_stop_or_url_article_text, punctuation_lst) confidence = is_good_comment[2] if (is_good_comment[0]): comment.body = vc.visualize(comment.body) comment.body = vc.good_comment(comment.body, confidence) comments.append(comment.body.split()) else: comment.body = vc.visualize(comment.body) comment.body = vc.bad_comment(comment.body, confidence) comments.append(comment.body.split()) return render_template( 'post.html', comments=comments, title=title, selftext=selftext, reddit_url=reddit_link, cleaned_article_text=cleaned_article_text, no_url_article_text=no_url_article_text, no_stop_article_text=no_stop_article_text, no_stop_or_url_article_text=no_stop_or_url_article_text, exp=None)
def init(self): self.capture = cv2.VideoCapture('data/' + self.file_name + '.avi') self.screenshots = [] self.timestamps = pickle.load( open('data/' + self.file_name + '.timestamp', 'rb')) self.frames = compress_pickle.load('data/' + self.file_name + '.gz') self.frame_id = 0 self.auto_play = False
def cache_read(file_name, cache_prefix=True): # file_name = cn_(file_name) if cache_prefix else file_name if os.path.exists(file_name): with open(file_name, 'rb') as f: return compress_pickle.load(f, compression="lzma") # return pickle.load(f) else: return None
def landSelector(land, PT_ROT): if land == 'PAN': lnd = (list(range(0, 62)), ) return lnd else: pth = ''.join(PT_ROT.replace('/'+land+'/', '')) lnd = pkl.load(path.join(pth, 'GEO', 'clusters.bz')) return lnd
def read(self, fsnum: int = 0, fsoffset: int = 0, random_addition: bool = True, random_read: bool = True): """ Load files from the given directory 'self.datadir'. - If 'fsnum' is 0, all files will be loaded. - If 'fsoffset' is 0, all files will be loaded. Parameters ---------- fsnum : int, optional Number of files to load. The default is 0. fsoffset : int, optional Offset from the first file to start counting of files. The default is 0. random_addition : bool, optional Read random files from the self.datadir if 'fsnum' is more than files number till the end of these. The default is True. random_read : bool, optional Read random files, not from the ordered list of files. The default is True. Returns ------- None. """ if fsnum != 0: self.fsnum = fsnum if fsoffset != 0: self.fsoffset = fsoffset datapaths = list_files( self.datadir, self.vendor, self.fsoffset, self.fsnum) if random_read or (len(datapaths) < self.fsnum and random_addition): _datapaths = list_files(self.datadir, self.vendor, 0, 0) indxs = np.arange(len(_datapaths)) rnd_indxs = rsample( indxs[indxs != self.fsoffset].tolist(), self.fsnum - 1) datapaths = [_datapaths[i] for i in rnd_indxs + [self.fsoffset]] # Check if file was processed files_processed = [k for k, v in self.rawdata.items() if v is not None] files_to_remove = [k for k in self.rawdata if k not in datapaths] for path in files_to_remove: del self.rawdata[path] for path in datapaths: if path not in files_processed: self.rawdata[path] = None if self.memo_file: self.rawdata.update(load(self.memo_file, compression='lzma', set_default_extension=False)) self.filesnum = len(self.rawdata) if not fsnum else fsnum
def _populate_master_seq_info_dict(self): if os.path.isfile( os.path.join(self.cache_dir, 'master_seq_info_dict.p.bz')): self.master_seq_info_dict = compress_pickle.load( os.path.join(self.cache_dir, 'master_seq_info_dict.p.bz')) else: print( 'Collecting sequence information (original seqs): first pass') for readset in self.coral_readsets: sys.stdout.write(f'\r{readset}') # read in the fasta file fasta_file = self._read_in_fasta_file(readset) # read in the name file and make an abundance dictionary name_rel_abund_dict = self._make_rel_abund_dict_from_names_path( readset) # read in the sample taxonomy dictionary sample_annotation_dict = compress_pickle.load( os.path.join(self.qc_dir, readset, 'sample_annotation_dict.p.bz')) # for each sequence in the fasta file # if not already in the dict, init with the rel abund and tax info # else simply add to the cumulative abund for i in range(0, len(fasta_file), 2): seq_name = fasta_file[i].split('\t')[0][1:] seq = fasta_file[i + 1] try: self.master_seq_info_dict[seq][ 'cummulative_abund'] += name_rel_abund_dict[ seq_name] except KeyError: try: tax_tup = sample_annotation_dict[seq_name] except KeyError: tax_tup = ('not_annotated', 'not_annotated', 'not_annotated') self.master_seq_info_dict[seq] = { 'cummulative_abund': name_rel_abund_dict[seq_name], 'tax_annotation': tax_tup } compress_pickle.dump( self.master_seq_info_dict, os.path.join(self.cache_dir, 'master_seq_info_dict.p.bz'))