def run(): for model, num_to_create in to_create.items(): model_name = model._meta.model_name bar = Bar('Creating {}'.format(model_name), max=num_to_create) model_count = model.objects.count() create_f = globals()['populate_{}'.format(model_name)] for i in range(num_to_create): ident = '{}{}'.format(model_name, i) if i < model_count: unit = model.objects.all()[i] else: unit = create_f(model, i) globals()[ident] = unit bar.next() bar.finish() # This bit is special: Associate all rpms with the first repo, # for maximum relational query fun num_units = platform.ContentUnit.objects.count() repo = globals()['repository0'] bar = Bar('Adding all units to {} repo'.format(repo.slug)) bar.max = num_units for unit in platform.ContentUnit.objects.all(): repo.add_units(unit) bar.next() bar.finish()
def mismas_features_distinto_humor(corpus): print("Buscando tweets con mismos valores de features pero distinto de humor...") humoristicos = [tweet for tweet in corpus if tweet.es_humor] no_humoristicos = [tweet for tweet in corpus if not tweet.es_humor] res = [] bar = IncrementalBar("Buscando en tweets\t\t", max=len(humoristicos) * len(no_humoristicos), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet_humor in humoristicos: for tweet_no_humor in no_humoristicos: if tweet_humor.features == tweet_no_humor.features: res.append((tweet_humor, tweet_no_humor)) if tweet_humor.texto_original == tweet_no_humor.texto_original: print("-----MISMO TEXTO ORIGINAL------") if tweet_humor.texto == tweet_no_humor.texto: print("----------MISMO TEXTO----------") if tweet_humor.id == tweet_no_humor.id: print("-----------MISMO ID------------") if tweet_humor.cuenta == tweet_no_humor.cuenta: print("----------MISMA CUENTA---------") print('') print(tweet_humor.id) print(tweet_humor.texto) print("------------") print(tweet_no_humor.id) print(tweet_no_humor.texto) print("------------") print('') bar.next() bar.finish() return res
def calcular_feature_thread(self, tweets, nombre_feature, identificador): if len(tweets) > 0: bar = IncrementalBar("Calculando feature " + nombre_feature + ' - ' + unicode(identificador), max=len(tweets), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) feature = self.features[nombre_feature] self.abortar_si_feature_no_es_thread_safe(feature) for tweet in tweets: tweet.features[feature.nombre] = feature.calcular_feature(tweet) bar.next() bar.finish()
def save_frames(source, vertices, images_dir): print('Saving frames...') if not os.path.isdir(images_dir): os.makedirs(images_dir) bar = IncrementalBar(max=len(vertices)) angle_change = 360 // len(vertices) for i, v in enumerate(vertices): update(source, v, angle_change=angle_change) mlab.savefig(filename=os.path.join(images_dir, frame_fn(i))) bar.next() bar.finish() mlab.close()
def calcular_features_thread(self, tweets, identificador): if len(tweets) > 0: bar = IncrementalBar("Calculando features - " + unicode(identificador), max=len(tweets) * len(self.features), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet in tweets: for feature in list(self.features.values()): self.abortar_si_feature_no_es_thread_safe(feature) tweet.features[feature.nombre] = feature.calcular_feature(tweet) bar.next() bar.finish()
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor): with closing(open_db()) as conexion: with closing(conexion.cursor()) as cursor: consulta = "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)" \ + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s" bar = IncrementalBar("Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor: cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id)) bar.next() conexion.commit() bar.finish()
def cross_validation_y_reportar(clasificador, features, clases, numero_particiones): skf = cross_validation.StratifiedKFold(clases, n_folds=numero_particiones) features = np.array(features) clases = np.array(clases) matrices = [] medidas = defaultdict(list) bar = IncrementalBar("Realizando cross-validation\t", max=numero_particiones, suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for entrenamiento, evaluacion in skf: clasificador.fit(features[entrenamiento], clases[entrenamiento]) clases_predecidas = clasificador.predict(features[evaluacion]) matriz_de_confusion = metrics.confusion_matrix(clases[evaluacion], clases_predecidas).flatten() matrices.append(matriz_de_confusion) for medida, valor_medida in calcular_medidas(*matriz_de_confusion).items(): medidas[medida].append(valor_medida) bar.next() bar.finish() promedios = {} print('') print("Resultados de cross-validation:") print('') for medida, valor_medida in medidas.items(): print("\t{medida: >18s}:\t{valor_medida}".format(medida=medida, valor_medida=valor_medida)) promedio = np.mean(valor_medida) promedios[medida] = promedio delta = np.std(valor_medida) * 1.96 / math.sqrt(numero_particiones) print("Intervalo de confianza 95%:\t{promedio:0.4f} ± {delta:0.4f} --- [{inf:0.4f}, {sup:0.4f}]".format( promedio=promedio, delta=delta, inf=promedio - delta, sup=promedio + delta)) print('') imprimir_matriz_metricas( promedios['Precision No humor'], promedios['Recall No humor'], promedios['F1-score No humor'], promedios['Precision Humor'], promedios['Recall Humor'], promedios['F1-score Humor'], ) print('') print('') print('')
def render(self, ctx, invert=False, filename=None, pbar=False): """ Generate image of layer. Parameters ---------- ctx : :class:`GerberContext` GerberContext subclass used for rendering the image filename : string <optional> If provided, save the rendered image to `filename` pbar : bool <optional> If true, render a progress bar """ ctx.set_bounds(self.bounds) ctx._paint_background() if invert: ctx.invert = True ctx._clear_mask() for p in self.primitives: ctx.render(p) if invert: ctx.invert = False ctx._render_mask() _pbar = None if pbar: try: from progress.bar import IncrementalBar _pbar = IncrementalBar( self.filename, max=len(self.primitives) ) except ImportError: pbar = False for p in self.primitives: ctx.render(p) if pbar: _pbar.next() if pbar: _pbar.finish() if filename is not None: ctx.dump(filename)
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor): with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion: with closing(conexion.cursor()) as cursor: consulta = ( "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)" + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s" ) bar = IncrementalBar( "Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor), suffix=SUFIJO_PROGRESS_BAR ) bar.next(0) for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor: cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id)) bar.next() conexion.commit() bar.finish()
def _create_unfilled_voxel_data( model_id, edge_length_threshold=0.1, voxel_config=None, overwrite=False, example_ids=None): from template_ffd.data.ids import get_example_ids from shapenet.core import cat_desc_to_id from template_ffd.model import load_params import numpy as np from progress.bar import IncrementalBar if voxel_config is None: voxel_config = _default_config cat_id = cat_desc_to_id(load_params(model_id)['cat_desc']) if example_ids is None: example_ids = get_example_ids(cat_id, 'eval') mesh_dataset = get_inferred_mesh_dataset(model_id, edge_length_threshold) voxel_dataset = get_voxel_dataset( model_id, edge_length_threshold, voxel_config, filled=False, auto_save=False) if not overwrite: example_ids = [i for i in example_ids if i not in voxel_dataset] if len(example_ids) == 0: return print('Creating %d voxels for model %s' % (len(example_ids), model_id)) kwargs = dict( voxel_dim=voxel_config.voxel_dim, exact=voxel_config.exact, dc=voxel_config.dc, aw=voxel_config.aw) with mesh_dataset: bar = IncrementalBar(max=len(example_ids)) for example_id in example_ids: bar.next() mesh = mesh_dataset[example_id] vertices, faces = ( np.array(mesh[k]) for k in ('vertices', 'faces')) binvox_path = voxel_dataset.path(example_id) # x, z, y = vertices.T # vertices = np.stack([x, y, z], axis=1) bio.mesh_to_binvox( vertices, faces, binvox_path, **kwargs) bar.finish()
def render_deferred(self): if not len(self._deferred): return print("Optimizing deferred elements") paths = self._optimize_deferred().paths print("Rendering Paths") try: from progress.bar import IncrementalBar _pbar = IncrementalBar(max=len(paths)) except ImportError: _pbar = None for path in paths: self._render_path(path) if _pbar: _pbar.next() if _pbar: _pbar.finish()
def guardar_features(tweets, **opciones): nombre_feature = opciones.pop("nombre_feature", None) conexion = mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME) cursor = conexion.cursor() consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s" if nombre_feature: mensaje = "Guardando feature " + nombre_feature else: mensaje = "Guardando features" bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet in tweets: if nombre_feature: cursor.execute( consulta, ( tweet.id, nombre_feature, unicode(tweet.features[nombre_feature]), unicode(tweet.features[nombre_feature]), ), ) else: for nombre_feature, valor_feature in tweet.features.items(): cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature))) bar.next() conexion.commit() bar.finish() cursor.close() conexion.close()
def cargar_parecidos_con_distinto_humor(): with closing(open_db()) as conexion: # buffered=True así sé la cantidad que son antes de iterarlos. with closing(conexion.cursor() if DB_ENGINE == 'sqlite3' else conexion.cursor(buffered=True)) as cursor: consulta = """ SELECT id_tweet_humor, id_tweet_no_humor FROM tweets_parecidos_distinto_humor """ cursor.execute(consulta) pares_ids_parecidos_con_distinto_humor = [] bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for par_ids in cursor: pares_ids_parecidos_con_distinto_humor.append(par_ids) bar.next() bar.finish() return pares_ids_parecidos_con_distinto_humor
def cargar_parecidos_con_distinto_humor(): with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion: # buffered=True así sé la cantidad que son antes de iterarlos. with closing(conexion.cursor(buffered=True)) as cursor: consulta = """ SELECT id_tweet_humor, id_tweet_no_humor FROM tweets_parecidos_distinto_humor """ cursor.execute(consulta) pares_ids_parecidos_con_distinto_humor = [] bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for par_ids in cursor: pares_ids_parecidos_con_distinto_humor.append(par_ids) bar.next() bar.finish() return pares_ids_parecidos_con_distinto_humor
def guardar_features(tweets, **opciones): nombre_feature = opciones.pop('nombre_feature', None) conexion = open_db() cursor = conexion.cursor() consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s" if nombre_feature: mensaje = 'Guardando feature ' + nombre_feature else: mensaje = 'Guardando features' bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet in tweets: if nombre_feature: cursor.execute( consulta, ( tweet.id, nombre_feature, unicode(tweet.features[nombre_feature]), unicode(tweet.features[nombre_feature]) ) ) else: for nombre_feature, valor_feature in tweet.features.items(): cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature))) bar.next() conexion.commit() bar.finish() cursor.close() conexion.close()
def install(package_list): ''' Install A Specified Package(s) ''' if platform == 'linux' or platform == 'darwin': password = getpass('Enter your password: '******'' # otherwise the variable would be undefined.. packages = package_list.split(',') turbocharge = Installer() click.echo('\n') os_bar = IncrementalBar('Getting Operating System...', max=1) os_bar.next() for package_name in packages: package_name = package_name.strip(' ') if platform == 'linux': click.echo('\n') finding_bar = IncrementalBar('Finding Requested Packages...', max=1) if package_name in devpackages_linux: show_progress(finding_bar) turbocharge.install_task( devpackages_linux[package_name], f'{constant.apt_script} {package_name}', password, f'{package_name} --version', [f'{devpackages_linux[package_name]} Version']) if package_name in applications_linux: show_progress(finding_bar) turbocharge.install_task( applications_linux[package_name], f'{constant.snap_script} {package_name}', password, '', []) if package_name == 'chrome': show_progress(finding_bar) try: click.echo('\n') password = getpass("Enter your password: "******"choco install {package_name} -y", password="", test_script=f"{package_name} --version", tests_passed=[ f'{devpackages_windows[package_name]} Version' ]) elif package_name in applications_windows: show_progress(finding_bar) turbocharge.install_task( package_name=applications_windows[package_name], script=f"choco install {package_name} -y", password="", test_script="", tests_passed=[]) elif package_name not in devpackages_windows and package_name not in applications_windows: click.echo('\n') click.echo(click.style(':( Package Not Found! :(', fg='red')) if platform == 'darwin': click.echo('\n') finding_bar = IncrementalBar('Finding Requested Packages...', max=1) if package_name in devpackages_windows: show_progress(finding_bar) turbocharge.install_task( package_name=devpackages_macos[package_name], script=f"brew install {package_name}", password="", test_script=f"{package_name} --version", tests_passed=[ f'{devpackages_macos[package_name]} Version' ]) # test _scirpt is just a string here.. elif package_name in applications_windows: show_progress(finding_bar) turbocharge.install_task( package_name=applications_macos[package_name], script=f"brew cask install {package_name}", password="", test_script="", tests_passed=[]) elif package_name not in devpackages_macos and package_name not in applications_macos: click.echo('\n') click.echo(click.style(':( Package Not Found! :(', fg='red'))
class SysExParser(object): def __init__(self,send_func,debug=False): super(SysExParser,self).__init__() self.send_func = send_func self.debug = debug self.dump_file = None self.dump_on = False self.dump_ram = False self.printer = MessagePrinter(debug=self.debug) self.handlers = { # FILE FUNCTIONS FILE_F "F_DHDR": self.handleFileDumpHeader, "F_DPKT": self.handleFileDumpDataBlock, "DIR_HDR": self.handleFileDumpHeader, "F_WAIT" : noop, "F_CANCEL" : cancel, "F_ERR" : cancel, # DEVICE COMMAND DEVICE_CMD "STAT_ANSWER": self.handleStatusAnswer, "DATA_HEADER": self.handleDirectoryAnswer, "DATA_DUMP" : self.handleDataDump, "DIR_ANSWER" : self.handleDirectoryAnswer, "D_WAIT" : noop, "D_ACK" : noop, "D_CANCEL" : cancel, "D_ERR" : cancel, } self.dump_start = [ "F_DREQ", "DATA_REQUEST" ] self.dump_stop = [ "F_CANCEL", "D_CANCEL"] def __del__(self): self.closeDumpFile() def createDumpFile(self,filename=None): if not filename: timestamp = time.strftime("%Y%m%d%H%M%S") filename="dump_%s.bin" % mktimestamp() self.dump_file = open(filename,"wb") def closeDumpFile(self): if not self.dump_file: return self.dump_file.close() self.dump_file = None def startDump(self,filename,size): if not self.dump_on: return self.dump_written = 0 self.dump_size = size self.closeDumpFile() self.createDumpFile(filename) print "Dumping '%s'" % filename showsize = ' 0x%(index)06x' if self.dump_ram else '' self.bar = IncrementalBar( max=size, suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]' + showsize) def stopDump(self): if not self.dump_on: return self.bar.finish() self.closeDumpFile() self.dump_on = False def dump(self,data,filename=None): if not self.dump_on: return if not self.dump_file: self.createDumpFile() if self.dump_written == self.dump_size: print "Discarding", len(data), "bytes, dump has ended" elif len(data) + self.dump_written > self.dump_size: discard = len(data) + self.dump_written - self.dump_size self.dump_file.write(bytearray(data[:-discard])) self.bar.next(self.dump_size-self.dump_written) self.dump_written = self.dump_size self.bar.finish() leftover = data[-discard:] for i in leftover: if i != 0: print "Discarding non-NUL data:", hexdump(leftover) break else: self.dump_file.write(bytearray(data)) self.dump_written += len(data) self.bar.next(len(data)) # FILE FUNCTIONS FILE_F def handleFileDumpHeader(self,msg,timestamp): self.sendSysEx( MSCEIMessage(fromName="F_WAIT"),timestamp=timestamp+1) offset=17 data = [] for i in xrange(2): data += conv7_8(msg[offset:offset+8]) offset += 8 location = '' while msg[offset] != 0: location += chr(msg[offset]) offset += 1 offset+=1 cc = msg[offset] cc_calc = checksum(msg[1:offset]) if cc == cc_calc: filename = str(bytearray(msg[5:16])).strip() length = struct.unpack('>I',list2str(data[4:8]))[0] self.startDump(filename,length) self.dump(data[8:]) self.sendSysEx( MSCEIMessage(fromName="F_ACK"), timestamp=timestamp+2) else: self.sendSysEx( MSCEIMessage(fromName="F_NACK"), timestamp=timestamp+2) return True def handleFileDumpDataBlock(self,msg,timestamp): self.sendSysEx( MSCEIMessage(fromName="F_WAIT"),timestamp=timestamp+1) noctets = msg[5] offset=6 data = [] for i in xrange(noctets): data += conv7_8(msg[offset:offset+8]) offset += 8 cc = msg[offset] cc_calc = checksum(msg[1:offset]) if cc == cc_calc: self.dump(data) self.sendSysEx( MSCEIMessage(fromName="F_ACK"), timestamp=timestamp+2) else: self.sendSysEx( MSCEIMessage(fromName="F_NACK"), timestamp=timestamp+2) return True # DEVICE COMMAND DEVICE_CMD def handleStatusAnswer(self,msg,timestamp): self.sendSysEx( MSCEIMessage(fromName="D_WAIT"),timestamp=timestamp+1) offset= 5 + 3*8 cc = msg[offset] cc_calc = checksum(msg[1:offset]) if cc == cc_calc: self.sendSysEx( MSCEIMessage(fromName="D_ACK"), timestamp=timestamp+2) if self.dump_ram: self.dump_on = True self.startDump("ramdump_%s.bin" % mktimestamp(), 2097060) time.sleep(0.1) self.sendSysEx( MSCEIMessage(fromName="F_ACK"), timestamp=timestamp+3) return True else: self.sendSysEx( MSCEIMessage(fromName="D_NACK"), timestamp=timestamp+2) return False def handleDataDump(self,msg,timestamp): self.sendSysEx( MSCEIMessage(fromName="D_WAIT")) noctets = msg[5] offset=6 data = [] for i in xrange(noctets): data += conv7_8(msg[offset:offset+8]) offset += 8 cc = msg[offset] cc_calc = checksum(msg[1:offset]) if cc == cc_calc: self.dump(data) self.sendSysEx( MSCEIMessage(fromName="D_ACK"), timestamp=timestamp+2) else: self.sendSysEx( MSCEIMessage(fromName="D_NACK"), timestamp=timestamp+2) return True def handleDirectoryAnswer(self,msg,timestamp): #time.sleep(0.1) self.sendSysEx( MSCEIMessage(fromName="D_WAIT"),timestamp=timestamp+1) offset = 8 + 11 + 1 data = [] for i in xrange(2): data += conv7_8(msg[offset:offset+8]) offset += 8 offset += 11 cc = msg[offset] cc_calc = checksum(msg[1:offset]) if cc == cc_calc: filename = str(bytearray(msg[8:19])).strip() length = struct.unpack('>I',list2str(data[4:8]))[0] self.startDump(filename,length) #time.sleep(0.1) self.sendSysEx( MSCEIMessage(fromName="D_ACK"), timestamp=timestamp+2) else: self.sendSysEx( MSCEIMessage(fromName="D_NACK"), timestamp=timestamp+2) return True def parse(self, msg, timestamp, acceptUnhandled=True): if msg[0] != 0xF0: print 'Non-sysex message' print [ hex(b) for b in msg ] print return acceptUnhandled
def train(self, ): T = 0 print("start training: {}_{}_{}_{}_{}_{}".format( "Q_onpolicy", self.alpha, self.alpha_end, self.epsilon, self.epsilon_end, self.maxepisode)) Q_value = self.get_Q_value() self.data.append([T, Q_value]) s = self.game.reset() p_A, p_B = self.gen_policy(s) a = self.choose_action(p_A, p_B) bar = Bar('Training', max=self.maxepisode, suffix='%(index)d/%(max)d - %(elapsed)ds/%(eta)ds') while T < self.maxepisode: #take action: s_prime, r_A, r_B, done, _ = self.game.step_encoded_action(a) #self.game.render() p_A, p_B = self.gen_policy(s_prime) a_prime = self.choose_action(p_A, p_B) a_A, a_B = self.game.decode_action(a_prime) p_A = np.zeros(self.nA, dtype=float) p_B = np.zeros(self.nA, dtype=float) p_A[a_A] = 1 p_B[a_B] = 1 self.learn(s, a, s_prime, r_A, r_B, done, p_A, p_B) self.alpha *= self.alpha_decay self.epsilon *= self.epsilon_decay Q_value_prime = self.get_Q_value() if s == self.game.encode_state( col_A=2, col_B=1, row_A=0, row_B=0, ball=1) and a == self.game.encode_action(a_A=2, a_B=0): self.data.append([T + 1, Q_value_prime]) #print("step: {}, Q: {}".format(T, Q_value_prime)) err_Q = np.abs(Q_value_prime - Q_value) Q_value = Q_value_prime #print("step: {}, Err_Q: {}".format(T, err_Q)) s = s_prime a = a_prime T += 1 if done: #print("yes") #self.game.render() s = self.game.reset() a = self.choose_action(p_A, p_B) else: s = s_prime bar.next() bar.finish() #np.save("Qtable_Q_offpolicy.npy", self.Q) final_policy = self.gen_policy( self.game.encode_state(col_A=2, col_B=1, row_A=0, row_B=0, ball=1)) self.final_policy = np.array(final_policy) print(final_policy[0]) print(final_policy[1]) print(final_policy[0].sum()) print(final_policy[1].sum()) #print(final_policy.reshape(5,5).sum(axis=1)) #print(final_policy.reshape(5,5).sum(axis=0)) #print(final_policy.sum()) pass
def find_metaclonotypes( project_path = "tutorial48", source_path = os.path.join(path_to_base,'tcrdist','data','covid19'), antigen_enriched_file = 'mira_epitope_48_610_YLQPRTFL_YLQPRTFLL_YYVGYLQPRTF.tcrdist3.csv', ncpus = 4, seed = 3434): """ This functions encapsulates a complete workflow for finding meta-clonotypes in antigen-enriched data. """ np.random.seed(seed) if not os.path.isdir(project_path): os.mkdir(project_path) ############################################################################ # Step 1: Select and load a antigen-enriched (sub)repertoire. #### ############################################################################ print(f"INITIATING A TCRrep() with {antigen_enriched_file}") assert os.path.isfile(os.path.join(source_path, antigen_enriched_file)) # Read file into a Pandas DataFrame <df> df = pd.read_csv(os.path.join(source_path, antigen_enriched_file)) # Drop cells without any gene usage information df = df[( df['v_b_gene'].notna() ) & (df['j_b_gene'].notna()) ] # Initialize a TCRrep class, using ONLY columns that are complete and unique define a a clone. # Class provides a 'count' column if non is present # Counts of identical subject:VCDR3 'clones' will be aggregated into a TCRrep.clone_df. from tcrdist.repertoire import TCRrep tr = TCRrep(cell_df = df[['subject','cell_type','v_b_gene', 'j_b_gene', 'cdr3_b_aa']], organism = "human", chains = ['beta'], compute_distances = True) tr.cpus = ncpus ############################################################################ # Step 1.1: Estimate Probability of Generation #### ############################################################################ ### It will be useful later to know the pgen of each from tcrdist.automate import auto_pgen print(f"COMPUTING PGEN WITH OLGA (Sethna et al 2018)") print("FOR ANTIGEN-ENRICHED CLONES TO BE USED FOR SUBSEQUENT ANALYSES") auto_pgen(tr) # Tip: Users of tcrdist3 should be aware that by default a <TCRrep.clone_df> # DataFrame is created out of non-redundant cells in the cell_df, and # pairwise distance matrices automatically computed. # Notice that attributes <tr.clone_df> and <tr.pw_beta> , <tr.pw_cdr3_b_aa>, # are immediately accessible. # Attributes <tr.pw_pmhc_b_aa>, <tr.pw_cdr2_b_aa>, and <tr.pw_cdr1_b_aa> # are also available if <TCRrep.store_all_cdr> is set to True. # For large datasets, i.e., >15,000 clones, this approach may consume too much # memory so <TCRrep.compute_distances> is automatically set to False. ############################################################################ # Step 2: Synthesize an Inverse Probability Weighted VJ Matched Background # ############################################################################ # Generating an appropriate set of unenriched reference TCRs is important; for # each set of antigen-associated TCRs, discovered by MIRA, we created a two part # background. One part consists of 100,000 synthetic TCRs whose V-gene and J-gene # frequencies match those in the antigen-enriched repertoire, using the software # OLGA (Sethna et al. 2019; Marcou et al. 2018). The other part consists of # 100,000 umbilical cord blood TCRs sampled uniformly from 8 subjects (Britanova # et al., 2017). This mix balances dense sampling of sequences near the # biochemical neighborhoods of interest with broad sampling of TCRs from an # antigen-naive repertoire. Importantly, we adjust for the biased sampling by # using the V- and J-gene frequencies observed in the cord-blood data (see # Methods for details about inverse probability weighting adjustment). Using this # approach we are able to estimate the abundance of TCRs similar to a centroid # TCR in an unenriched background repertoire of ~1,000,000 TCRs, using a # comparatively modest background dataset of 200,000 TCRs. While this estimate # may underestimate the true specificity, since some of the neighborhood TCRs in # the unenriched background repertoire may in fact recognize the antigen of # interest, it is useful for prioritizing neighborhoods and selecting a radius # for each neighborhood that balances sensitivity and specificity. # Initialize a TCRsampler -- human, beta, umbilical cord blood from 8 people. print(f"USING tcrsampler TO CONSTRUCT A CUSTOM V-J MATCHED BACKGROUND") from tcrsampler.sampler import TCRsampler ts = TCRsampler(default_background = 'britanova_human_beta_t_cb.tsv.sampler.tsv') # Stratify sample so that each subject contributes similarly to estimate of # gene usage frequency from tcrdist.background import get_stratified_gene_usage_frequency ts = get_stratified_gene_usage_frequency(ts = ts, replace = True) # Synthesize an inverse probability weighted V,J gene background that matches # usage in your enriched repertoire df_vj_background = tr.synthesize_vj_matched_background(ts = ts, chain = 'beta') # Get a randomly drawn stratified sampler of beta, cord blood from # Britanova et al. 2016 # Dynamics of Individual T Cell Repertoires: From Cord Blood to Centenarians from tcrdist.background import sample_britanova df_britanova_100K = sample_britanova(size = 100000) # Append frequency columns using, using sampler above df_britanova_100K = get_gene_frequencies(ts = ts, df = df_britanova_100K) df_britanova_100K['weights'] = 1 df_britanova_100K['source'] = "stratified_random" # Combine the two parts of the background into a single DataFrame df_bkgd = pd.concat([df_vj_background.copy(), df_britanova_100K.copy()], axis = 0).\ reset_index(drop = True) # Assert that the backgrounds have the expected number of rows. assert df_bkgd.shape[0] == 200000 # Save the background for future use background_outfile = os.path.join(project_path, f"{antigen_enriched_file}.olga100K_brit100K_bkgd.csv") print(f'WRITING {background_outfile}') df_bkgd.to_csv(background_outfile, index = False) # Load the background to a TCRrep without computing pairwise distances # (i.e., compute_distances = False) tr_bkgd = TCRrep( cell_df = df_bkgd, organism = "human", chains = ['beta'], compute_distances = False) # Compute rectangular distances. Those are, distances between each clone in # the antigen-enriched repertoire and each TCR in the background. # With a single 1 CPU and < 10GB RAM, 5E2x2E5 = 100 million pairwise distances, # across CDR1, CDR2, CDR2.5, and CDR3 # 1min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) # %timeit -r 1 tr.compute_rect_distances(df = tr.clone_df, df2 = tr_bkdg.clone_df, store = False) ############################################################################ # Step 4: Calculate Distances ##### ############################################################################ print(f"COMPUTING RECTANGULARE DISTANCE") tr.compute_sparse_rect_distances( df = tr.clone_df, df2 = tr_bkgd.clone_df, radius=50, chunk_size = 100) scipy.sparse.save_npz(os.path.join(project_path, f"{antigen_enriched_file}.rw_beta.npz"), tr.rw_beta) # Tip: For larger dataset you can use a sparse implementation: # 30.8 s ± 0 ns per loop ; tr.cpus = 6 # %timeit -r tr.compute_sparse_rect_distances(df = tr.clone_df, df2 = tr_bkdg.clone_df,radius=50, chunk_size=85) ############################################################################ # Step 5: Examine Density ECDFS ##### ############################################################################ # Investigate the density of neighbors to each TCR, based on expanding # distance radius. from tcrdist.ecdf import distance_ecdf, _plot_manuscript_ecdfs import matplotlib.pyplot as plt # Compute empirical cumulative density function (ecdf) # Compare Antigen Enriched TCRs (against itself). thresholds, antigen_enriched_ecdf = distance_ecdf( tr.pw_beta, thresholds=range(0,50,2)) # Compute empirical cumulative density function (ecdf) # Compare Antigen Enriched TCRs (against) 200K probability # inverse weighted background thresholds, background_ecdf = distance_ecdf( tr.rw_beta, thresholds=range(0,50,2), weights= tr_bkgd.clone_df['weights'], absolute_weight = True) # plot_ecdf similar to tcrdist3 manuscript # antigen_enriched_ecdf[antigen_enriched_ecdf == antigen_enriched_ecdf.min()] = 1E-10 f1 = _plot_manuscript_ecdfs( thresholds, antigen_enriched_ecdf, ylab= 'Proportion of Antigen Enriched TCRs', cdr3_len=tr.clone_df.cdr3_b_aa.str.len(), min_freq=1E-10) f1.savefig(os.path.join(project_path, f'{antigen_enriched_file}.ecdf_AER_plot.png')) f2 = _plot_manuscript_ecdfs( thresholds, background_ecdf, ylab= 'Proportion of Reference TCRs', cdr3_len=tr.clone_df.cdr3_b_aa.str.len(), min_freq=1E-10) f2.savefig(os.path.join(project_path, f'{antigen_enriched_file}.ecdf_BUR_plot.png')) ############################################################################ # Step 6: Find optimal radii (theta = 1E5 ##### ############################################################################ # To ascertain which meta-clonotypes are likely to be most specific, # take advantage of an existing function <bkgd_cntrl_nn2>. # d888 .d8888b. 8888888888 888888888 # d8888 d88P Y88b 888 888 # 888 888 888 888 888 # 888 888 888 8888888 8888888b. # 888 888 888 888 "Y88b # 888 888 888 888 888888 888 # 888 Y88b d88P 888 Y88b d88P # 8888888 "Y8888P" 8888888888 "Y8888P" level_tag = '1E5' from tcrdist.neighbors import bkgd_cntl_nn2 centers_df = bkgd_cntl_nn2( tr = tr, tr_background = tr_bkgd, weights = tr_bkgd.clone_df.weights, ctrl_bkgd = 10**-5, col = 'cdr3_b_aa', add_cols = ['v_b_gene', 'j_b_gene'], ncpus = 4, include_seq_info = True, thresholds = [x for x in range(0,50,2)], generate_regex = True, test_regex = True, forced_max_radius = 36) ############################################################################ # Step 6.2: (theta = 1E5) ALL meta-clonotypes .tsv file ## ############################################################################ # save center to project_path for future use centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) # Many of meta-clonotypes contain redundant information. # We can winnow down to less-redundant list. We do this # by ranking clonotypes from most to least specific. # <min_nsubject> is minimum publicity of the meta-clonotype, # <min_nr> is minimum non-redundancy # Add neighbors, K_neighbors, and nsubject columns from tcrdist.public import _neighbors_variable_radius, _neighbors_sparse_variable_radius centers_df['neighbors'] = _neighbors_variable_radius(pwmat=tr.pw_beta, radius_list = centers_df['radius']) centers_df['K_neighbors'] = centers_df['neighbors'].apply(lambda x : len(x)) # We determine how many <nsubjects> are in the set of neighbors centers_df['nsubject'] = centers_df['neighbors'].\ apply(lambda x: tr.clone_df['subject'].iloc[x].nunique()) centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) from tcrdist.centers import rank_centers ranked_centers_df = rank_centers( centers_df = centers_df, rank_column = 'chi2joint', min_nsubject = 2, min_nr = 1) ############################################################################ # Step 6.3: (theta = 1E5) NR meta-clonotypes .tsv file ### ############################################################################ # Output, ready to search bulk data. ranked_centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) ############################################################################ # Step 6.4: (theta = 1E5) Output Meta-Clonotypes HTML Summary ### ############################################################################ # Here we can make a svg logo for each NR meta-clonotype if ranked_centers_df.shape[0] > 0: from progress.bar import IncrementalBar from tcrdist.public import make_motif_logo cdr3_name = 'cdr3_b_aa' v_gene_name = 'v_b_gene' svgs = list() svgs_raw = list() bar = IncrementalBar('Processing', max = ranked_centers_df.shape[0]) for i,r in ranked_centers_df.iterrows(): bar.next() centroid = r[cdr3_name] v_gene = r[v_gene_name] svg, svg_raw = make_motif_logo( tcrsampler = ts, pwmat = tr.pw_beta, clone_df = tr.clone_df, centroid = centroid , v_gene = v_gene , radius = r['radius'], pwmat_str = 'pw_beta', cdr3_name = 'cdr3_b_aa', v_name = 'v_b_gene', gene_names = ['v_b_gene','j_b_gene']) svgs.append(svg) svgs_raw.append(svg_raw) bar.next();bar.finish() ranked_centers_df['svg'] = svgs ranked_centers_df['svg_raw'] = svgs_raw def shrink(s): return s.replace('height="100%"', 'height="20%"').replace('width="100%"', 'width="20%"') labels =['cdr3_b_aa','v_b_gene', 'j_b_gene', 'pgen', 'radius', 'regex','nsubject','K_neighbors', 'bkgd_hits_weighted','chi2dist','chi2re','chi2joint'] output_html_name = os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.html') # 888 888 88888888888 888b d888 888 # 888 888 888 8888b d8888 888 # 888 888 888 88888b.d88888 888 # 8888888888 888 888Y88888P888 888 # 888 888 888 888 Y888P 888 888 # 888 888 888 888 Y8P 888 888 # 888 888 888 888 " 888 888 # 888 888 888 888 888 88888888 with open(output_html_name, 'w') as output_handle: for i,r in ranked_centers_df.iterrows(): #import pdb; pdb.set_trace() svg, svg_raw = r['svg'],r['svg_raw'] output_handle.write("<br></br>") output_handle.write(shrink(svg)) output_handle.write(shrink(svg_raw)) output_handle.write("<br></br>") output_handle.write(pd.DataFrame(r[labels]).transpose().to_html()) output_handle.write("<br></br>") # To ascertain which meta-clonotypes are likely to be most specific, # take advantage of an existing function <bkgd_cntrl_nn2>. # d888 .d8888b. 8888888888 .d8888b. # d8888 d88P Y88b 888 d88P Y88b # 888 888 888 888 888 # 888 888 888 8888888 888d888b. # 888 888 888 888 888P "Y88b # 888 888 888 888 888888 888 888 # 888 Y88b d88P 888 Y88b d88P # 8888888 "Y8888P" 8888888888 "Y8888P" ############################################################################ # Step 6.5: Find optimal radii (theta = 1E6) ### ############################################################################ level_tag = '1E6' from tcrdist.neighbors import bkgd_cntl_nn2 centers_df = bkgd_cntl_nn2( tr = tr, tr_background = tr_bkgd, weights = tr_bkgd.clone_df.weights, ctrl_bkgd = 10**-6, col = 'cdr3_b_aa', add_cols = ['v_b_gene', 'j_b_gene'], ncpus = 4, include_seq_info = True, thresholds = [x for x in range(0,50,2)], generate_regex = True, test_regex = True, forced_max_radius = 36) ############################################################################ # Step 6.6: (theta = 1E6) ALL meta-clonotypes .tsv file ## ############################################################################ # save center to project_path for future use centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) # Many of meta-clonotypes contain redundant information. # We can winnow down to less-redundant list. We do this # by ranking clonotypes from most to least specific. # <min_nsubject> is minimum publicity of the meta-clonotype, # <min_nr> is minimum non-redundancy # Add neighbors, K_neighbors, and nsubject columns from tcrdist.public import _neighbors_variable_radius, _neighbors_sparse_variable_radius centers_df['neighbors'] = _neighbors_variable_radius(pwmat=tr.pw_beta, radius_list = centers_df['radius']) centers_df['K_neighbors'] = centers_df['neighbors'].apply(lambda x : len(x)) # We determine how many <nsubjects> are in the set of neighbors centers_df['nsubject'] = centers_df['neighbors'].\ apply(lambda x: tr.clone_df['subject'].iloc[x].nunique()) centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) from tcrdist.centers import rank_centers ranked_centers_df = rank_centers( centers_df = centers_df, rank_column = 'chi2joint', min_nsubject = 2, min_nr = 1) ############################################################################ # Step 6.7: (theta = 1E6) NR meta-clonotypes .tsv file ### ############################################################################ # Output, ready to search bulk data. ranked_centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" ) ############################################################################ # Step 6.8: (theta = 1E6) Output Meta-Clonotypes HTML Summary ### ############################################################################ # Here we can make a svg logo for each meta-clonotype from progress.bar import IncrementalBar from tcrdist.public import make_motif_logo if ranked_centers_df.shape[0] > 0: cdr3_name = 'cdr3_b_aa' v_gene_name = 'v_b_gene' svgs = list() svgs_raw = list() bar = IncrementalBar('Processing', max = ranked_centers_df.shape[0]) for i,r in ranked_centers_df.iterrows(): bar.next() centroid = r[cdr3_name] v_gene = r[v_gene_name] svg, svg_raw = make_motif_logo( tcrsampler = ts, pwmat = tr.pw_beta, clone_df = tr.clone_df, centroid = centroid , v_gene = v_gene , radius = r['radius'], pwmat_str = 'pw_beta', cdr3_name = 'cdr3_b_aa', v_name = 'v_b_gene', gene_names = ['v_b_gene','j_b_gene']) svgs.append(svg) svgs_raw.append(svg_raw) bar.next();bar.finish() ranked_centers_df['svg'] = svgs ranked_centers_df['svg_raw'] = svgs_raw def shrink(s): return s.replace('height="100%"', 'height="20%"').replace('width="100%"', 'width="20%"') labels =['cdr3_b_aa', 'v_b_gene', 'j_b_gene', 'pgen', 'radius', 'regex','nsubject','K_neighbors', 'bkgd_hits_weighted','chi2dist','chi2re','chi2joint'] output_html_name = os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.html') # 888 888 88888888888 888b d888 888 # 888 888 888 8888b d8888 888 # 888 888 888 88888b.d88888 888 # 8888888888 888 888Y88888P888 888 # 888 888 888 888 Y888P 888 888 # 888 888 888 888 Y8P 888 888 # 888 888 888 888 " 888 888 # 888 888 888 888 888 88888888 with open(output_html_name, 'w') as output_handle: for i,r in ranked_centers_df.iterrows(): #import pdb; pdb.set_trace() svg, svg_raw = r['svg'],r['svg_raw'] output_handle.write("<br></br>") output_handle.write(shrink(svg)) output_handle.write(shrink(svg_raw)) output_handle.write("<br></br>") output_handle.write(pd.DataFrame(r[labels]).transpose().to_html()) output_handle.write("<br></br>")
def analogy_exp(embs, hdataset, hparams): """ Runs the two big analogy datasets on the set of embeddings passed to it. Calculates 3cosadd and 3cosmul. :param embs: Embeddings class, a hilbert embeddings object. :param hdataset: HilbertDataset object :param hparams: unused - kept for interface functionality :return: ResultsHolder object """ results = ResultsHolder(ANALOGY) # normalize for faster sim calcs. embs.matrix = F.normalize(embs.matrix, p=2, dim=1) # for showing over time total_iter = sum(len(samples) for samples in hdataset.values()) iter_step = 100 bar = IncrementalBar('Running analogy experiments', max=total_iter) # iterate over the two analogy datasets for dname, samples in hdataset.items(): correct_cosadd = 0 correct_cosmul = 0 missing_words = 0 missing_answer = 0 total_all_embeddings = 0 # w1 is to w2 as w3 is to w4 for i, (w1, w2, w3, w4) in enumerate(samples): if i % iter_step == 0: bar.next(n=iter_step) if not embs.has_w(w4): missing_answer += 1 continue e1 = embs.get_vec(w1).reshape(-1, 1) e2 = embs.get_vec(w2).reshape(-1, 1) e3 = embs.get_vec(w3).reshape(-1, 1) # get cos sims for each of them with the dataset sim_all = embs.matrix.mm(torch.cat([e1, e2, e3], dim=1)) # calculuate 3cosadd cos_add = sim_all[:, 1] + sim_all[:, 2] - sim_all[:, 0] # 3cosmul requires all similarities to be nonnegative, conveniently told to us in a footnote. # see footnote 7 in http://anthology.aclweb.org/W/W14/W14-1618.pdf sim_all = (sim_all + 1) / 2 cos_mul = (sim_all[:, 1] * sim_all[:, 2]) / ( sim_all[:, 0] + 0.001) # add epsilon to avoid divide by 0 # make sure we don't get the vecs themselves have_all_embs = True for wi in (w1, w2, w3): try: w_id = embs.get_id(wi) cos_add[w_id] = -np.inf cos_mul[w_id] = -np.inf except KeyError: missing_words += 1 have_all_embs = False # get the best with argmax best_w_add = embs.get_token(cos_add.argmax()) best_w_mul = embs.get_token(cos_mul.argmax()) # count up for final accuracy correct_cosadd += 1 if w4 == best_w_add else 0 correct_cosmul += 1 if w4 == best_w_mul else 0 total_all_embeddings += 1 if have_all_embs else 0 # save the accuracies results.add_ds_results( dname, { '3cosadd': correct_cosadd / len(samples), '3cosmul': correct_cosmul / len(samples), '3cosadd_had_answer': correct_cosadd / (len(samples) - missing_answer), '3cosmul_had_answer': correct_cosmul / (len(samples) - missing_answer), '3cosadd_full_coverage': correct_cosadd / total_all_embeddings, '3cosmul_full_coverage': correct_cosmul / total_all_embeddings, 'missing_words': missing_words / (3 * len(samples)), 'missing_answer': missing_answer / len(samples), 'coverage': total_all_embeddings / len(samples), }) bar.finish() return results
def plot_all_results(Usol, x, t): """ Plot pressure, density, velocity and lambda :param U: Solution tensor :return: """ Res = np.zeros_like(Usol) # Res[0] = P # Res[1] = Rho # Density # Res[2] = velocity # # Res[3] = Phi # Res[4] = lambda prog_bar = IncrementalBar( 'Finished simulation. ' 'Computing plot variables...', max=len(t)) for tind, time in enumerate(t): U = Usol[tind, :, :] # Compute physical values from U state vectors F, pp = f(U) S, C = s(U, F, pp) (u, E, PHI, LAMBD, V, P) = pp Res[tind, 0, :] = P Res[tind, 1, :] = U[0] Res[tind, 2, :] = u Res[tind, 3, :] = PHI Res[tind, 4, :] = LAMBD prog_bar.next() Pressure_plot = np.minimum(Res[:, 0, :] * 1e3, np.ones_like(Res[:, 0, :]) * 50) plot_u_t( x, t, Pressure_plot, #title=r'Pressure $P$ (GPa) $\times 10^{-3}$', fign=0) title=r'Pressure $P$ (GPa)', fign=0) Density_plot = np.minimum(Res[:, 1, :] * 1e3, np.ones_like(Res[:, 0, :]) * 5) plot_u_t( x, t, Density_plot, #title=r'Density $\rho$ (g/mm$^3$)', fign=1) title=r'Density $\rho$ (g/cm$^3$)', fign=1) Velocity_plot = np.minimum(Res[:, 2, :], np.ones_like(Res[:, 0, :]) * 5) Velocity_plot = np.maximum(Velocity_plot, np.ones_like(Res[:, 0, :]) * -5) plot_u_t(x, t, Velocity_plot, title=r'Velocity $u$ (mm . $\mu$ s$^{-1}$)', fign=2) plot_u_t(x, t, Res[:, 3, :], title=r'$\phi$ ', fign=3) plot_u_t(x, t, Res[:, 4, :], title=r'$\lambda$ ', fign=4) return F
def _quasi_public_meta_clonotypes( clone_df, pwmat, tcrsampler, cdr3_name='cdr3_d_aa', v_gene_name='v_d_gene', nr_filter=True, output_html_name="quasi_public_clones.html", sort_columns=['nsubject', 'K_neighbors'], sort_ascending=False, labels=[ 'clone_id', 'cdr3_d_aa', 'v_d_gene', 'j_d_gene', 'radius', 'neighbors', 'K_neighbors', #'cdr3s', 'nsubject', 'qpublic', 'cdr3_d_aa.summary', 'v_d_gene.summary', 'j_d_gene.summary', 'subject.summary' ], fixed_radius=False, radius=None, query_str='qpublic == True & K_neighbors > 1', kargs_member_summ={ 'key_col': 'neighbors', 'count_col': 'count', 'addl_cols': ['subject'], 'addl_n': 4 }, kargs_motif={ 'pwmat_str': 'pw_delta', 'cdr3_name': 'cdr3_d_aa', 'v_name': 'v_d_gene', 'gene_names': ['v_d_gene', 'j_d_gene'] }): """ _quasi_public_meta_clonotypes Parameters ---------- clone_df : pd.DataFrame Clones information with standard tcrdist3 column names. pwmat : np.array Pairwise distances tcrsamper : tcrsampler.TCRsampler TCRSampler instance initialized with appropriate background set. cdr3_name : str Column name for amino acid CDR3 e.g., 'cdr3_d_aa'. v_gene_name : str Column name for TR[ABGD]V gene e.g., 'v_d_gene'. nr_filter : bool If True, sequqences with the exact same neighbors as another set will be dropped output_html_name : str Filename for the output html output. labels : list List of columns to display on html output beneath each logo plot. fixed_radius : False If False, clone_df must have a column radius. If True, argument radius will be used to define maximum distance from centroid to neighboring TCR. radius : int or None Theshold distance (<=) for neighborhood membership. If int, then all centroids will be assigned the same radius. Alterntively radius can be provided for each centroid sequence by including radius as a numeric column in clone_df. query_str : str The string to include sequences in output. For instance 'qpublic == True and K_neighbors > 3', implies that only grouping of 4 or more TCRs from at leåast two individuals will be retained. Alternatively, 'nsubject > 1' or 'qpublic == True' could be used as true minimum requirements for quasi-publicity. kargs_member_summ : dict kwargs kargs_motif : dict kwargs for the motif genertation Returns ------- Returns DataFrames in a Dictionary. nn_summary : pd.DataFrame DataFrame matchign clone_df with summary measures added quasi_public_df pd.DataFrame Dataframe with only those rows that match the <query_str> and nr_filter. {'nn_summary': nn_summary : pd.DataFrame, 'quasi_public_df': quasi_public_df : nn_summary : pd.DataFrame} Notes ----- Importantly a html file is written displaying the quasi-public meta-clonotypes The easiest way to integrate this with existing nieghbor_diff add 'neighbors' and 'K_neighbors' to the clone df. Other columns could be added as well, and then displayed if added to the lis of labels. nn_clone_df = pd.concat([tr.clone_df, ndif[['neighbors', 'K_neighbors','val_0','ct_0']] ], axis = 1) Examples -------- """ if 'neighbors' not in clone_df.columns: if fixed_radius: clone_df['radius'] = radius clone_df['neighbors'] = _neighbors_fixed_radius(pwmat=pwmat, radius=radius) else: assert 'radius' in clone_df.columns, "if not using fixed_radius, the clone_df must have a numeric 'radius' columns" clone_df['neighbors'] = _neighbors_variable_radius( pwmat=pwmat, radius_list=clone_df.radius) if 'K_neighbors' not in clone_df.columns: if fixed_radius: clone_df['K_neighbors'] = _K_neighbors_fixed_radius(pwmat=pwmat, radius=radius) else: clone_df['K_neighbors'] = _K_neighbors_variable_radius( pwmat=pwmat, radius_list=clone_df.radius) if 'nsubject' not in clone_df.columns: clone_df['nsubject'] = clone_df['neighbors'].\ apply(lambda x: clone_df['subject'].iloc[x].nunique()) if 'qpublic' not in clone_df.columns: clone_df['qpublic'] = clone_df['nsubject'].\ apply(lambda x: x > 1) nn_summary = member_summ(res_df=clone_df, clone_df=clone_df, **kargs_member_summ) nn_summary = nn_summary.rename( columns={k: f'{k}.summary' for k in nn_summary.columns}) clone_df['cdr3s'] = clone_df['neighbors'].apply( lambda x: clone_df[cdr3_name].iloc[x].to_list()) clone_df = pd.concat([clone_df, nn_summary], axis=1) quasi_public_df = clone_df.query(query_str).\ sort_values(sort_columns, ascending = sort_ascending).\ reset_index(drop = True).\ copy() if quasi_public_df.shape[0] == 0: raise ValueError( "UNFORTUNATELY NO QUASI PUBLIC CLOONES WERE FOUND, CONSIDER YOUR QUERY STRINGENCY" ) quasi_public_df['unique_set'] = test_for_subsets( quasi_public_df['neighbors']) if nr_filter: quasi_public_df = filter_is(quasi_public_df, 'unique_set', 1).reset_index(drop=True) print( f"GENERATING {quasi_public_df.shape[0]} QUASI-PUBLIC MOTIFS SATISFYING {query_str}" ) bar = IncrementalBar('Processing', max=quasi_public_df.shape[0]) svgs = list() svgs_raw = list() for i, r in quasi_public_df.iterrows(): bar.next() centroid = r[cdr3_name] v_gene = r[v_gene_name] svg, svg_raw = make_motif_logo(tcrsampler=tcrsampler, pwmat=pwmat, clone_df=clone_df, centroid=centroid, v_gene=v_gene, radius=r['radius'], **kargs_motif) svgs.append(svg) svgs_raw.append(svg_raw) bar.next() bar.finish() quasi_public_df['svg'] = svgs quasi_public_df['svg_raw'] = svgs_raw def shrink(s): s = s.replace('height="100%"', 'height="20%"') s = s.replace('width="100%"', 'width="20%"') return s print(labels) with open(output_html_name, 'w') as output_handle: for i, r in quasi_public_df.iterrows(): #import pdb; pdb.set_trace() svg, svg_raw = r['svg'], r['svg_raw'] output_handle.write("<br></br>") output_handle.write(shrink(svg)) output_handle.write(shrink(svg_raw)) output_handle.write("<br></br>") output_handle.write(pd.DataFrame(r[labels]).transpose().to_html()) output_handle.write("<br></br>") return { 'nn_summary': nn_summary, 'quasi_public_df': quasi_public_df, 'clone_df': clone_df }
print('Phone #: ' + phone) # Prints 'phone' variable. print('Hours of Operation: ') # Prints heading for hours. # Prints variable hours. print(hours[:9] + ': ' + hours[10:28]) print(hours[29:32] + ': ' + hours[33:51]) print(hours[52:55] + ': ' + hours[56:74]) print(hours[75:84] + ': ' + hours[85:103]) print('Review Overview: ') # Prints heading for Reviews. # Calls 'circles' function and prints what it returns and the variable 'rating'. print(circles(rating) + ': ' + str(rating) + ' Rating') excellentBar = IncrementalBar('Excellent: ', max=total) # Creates an IncrementalBar item. for i in range( excellent): # Iterates n times, n = number of excellent reviews. excellentBar.next() # Updates bar length. print(' : ' + str(int(round(excellent / total, 2) * 100)) + '%') # Prints number of reviews and percentage. verygoodBar = IncrementalBar('Very Good: ', max=total) # Creates an IncrementalBar item. for i in range(verygood): # Iterates n times, n = number of very good reviews. verygoodBar.next() # Updates bar length. print(' : ' + str(int(round(verygood / total, 2) * 100)) + '%') # Prints number of reviews and percentage. averageBar = IncrementalBar('Average: ', max=total) # Creates an IncrementalBar item. for i in range(average): # Iterates n times, n = number of average reviews. averageBar.next() # Updates bar length. print(' : ' + str(int(round(average / total, 2) * 100)) +
def main(args): if not os.path.exists('results'): os.makedirs('results') if not os.path.exists('counters'): os.makedirs('counters') exp_type = utils.create_file_prefix(args.positive_fraction, args.with_delta, args.fraction, args.sampler_size, args.pop) send_strategy = SendStrategy.SendDelta( ) if args.with_delta else SendStrategy.SendVector() for dataset in args.datasets: print("Working on", dataset, "dataset") if not os.path.exists('results/{}'.format(dataset)): os.makedirs('results/{}'.format(dataset)) if not os.path.exists('counters/{}'.format(dataset)): os.makedirs('counters/{}'.format(dataset)) if args.create_dataset_files: # Read the dataset and prepare it for training, validation and test names = ['user_id', 'item_id', 'rating', 'utc'] df = pd.read_csv('original_datasets/' + dataset + '.tsv', sep='\t', dtype={ 'rating': 'float64', 'utc': 'int64' }, header=0, names=names) df = df.groupby('user_id').filter(lambda x: len(x) >= 20) print(df.shape[0], 'interactions read') df, _ = utils.convert_unique_idx(df, 'user_id') df, _ = utils.convert_unique_idx(df, 'item_id') user_size = len(df['user_id'].unique()) item_size = len(df['item_id'].unique()) print('Found {} users and {} items'.format(user_size, item_size)) total_user_lists = utils.create_user_lists(df, user_size, 4) train_user_lists, validation_user_lists, test_user_lists = utils.split_train_test( total_user_lists, test_size=0.2, validation_size=args.validation_size) #train_interactions_size = sum([len(user_list) for user_list in train_user_lists]) #print('{} interactions considered for training'.format(train_interactions_size)) if not os.path.exists('sets'): os.makedirs('sets') with open('sets/{}_trainingset.tsv'.format(dataset), 'w') as out: for u, train_list in enumerate(train_user_lists): for i in train_list: out.write( str(u) + '\t' + str(i) + '\t' + str(1) + '\n') with open('sets/{}_testset.tsv'.format(dataset), 'w') as out: for u, test_list in enumerate(test_user_lists): for i in test_list: out.write( str(u) + '\t' + str(i) + '\t' + str(1) + '\n') continue df = pd.read_csv('sets/{}_trainingset.tsv'.format(dataset), sep='\t', names=['user_id', 'item_id', 'rating']) df, reverse_dict = utils.convert_unique_idx(df, 'item_id') user_size = len(df['user_id'].unique()) item_size = len(df['item_id'].unique()) print('Found {} users and {} items'.format(user_size, item_size)) train_user_lists = utils.create_user_lists(df, user_size, 3) train_interactions_size = sum( [len(user_list) for user_list in train_user_lists]) print('{} interactions considered for training'.format( train_interactions_size)) if args.pop: print("Analyzing popularity... \r") most_popular_items = (args.pop, utils.get_popularity(train_user_lists)) print("Done.") else: most_popular_items = None if args.pop == 3: splitting_epochs = [ int(7 * args.n_epochs / 8), int(3 * args.n_epochs / 4), int(args.n_epochs / 2) ] # Set parameters based on arguments if args.fraction == 0: round_modifier = int(train_interactions_size) else: round_modifier = int(train_interactions_size / (args.fraction * user_size)) sampler_dict = { 'single': 1, 'uniform': int(train_interactions_size / user_size) } sampler_size = sampler_dict.get(args.sampler_size) # Build final triplet samplers triplet_samplers = [ TripletSampler(train_user_lists[u], item_size, sampler_size) for u in range(user_size) ] for n_factors in args.n_factors: exp_setting_1 = "_F" + str(n_factors) for lr in args.lr: exp_setting_2 = exp_setting_1 + "_LR" + str(lr) # Create server and clients server_model = ServerModel(item_size, n_factors) server = Server(server_model, lr, args.fraction, args.positive_fraction, args.mp, send_strategy, most_popular_items) clients = [ Client(u, ClientModel(n_factors), triplet_samplers[u], train_user_lists[u], sampler_size) for u in range(user_size) ] # Start training for i in range(args.n_epochs * round_modifier): if i % round_modifier == 0: bar = IncrementalBar('Epoch ' + str(int(i / round_modifier + 1)), max=round_modifier) bar.next() server.train_model(clients) if args.pop: if args.pop == 3: if len(splitting_epochs) > 0: if (i + 1) % (splitting_epochs[-1] * round_modifier) == 0: splitting_epochs.pop() server.new_step() else: if (i + 1) % (args.step_every * round_modifier) == 0: server.new_step() # Evaluation if ((i + 1) % (args.eval_every * round_modifier)) == 0: exp_setting_3 = exp_setting_2 + "_I" + str( (i + 1) / round_modifier) results = server.predict(clients, max_k=100) with open( 'results/{}/{}{}.tsv'.format( dataset, exp_type, exp_setting_3), 'w') as out: for u in range(len(results)): for e, p in results[u].items(): out.write( str(u) + '\t' + str(reverse_dict[e]) + '\t' + str(p) + '\n') final_dict = {k: 0 for k in range(item_size)} for i in server.big_list: final_dict[i] += 1 with open('counters/{}/{}.tsv'.format(dataset, exp_type), 'w') as out: for k, v in final_dict.items(): out.write(str(reverse_dict[k]) + '\t' + str(v) + '\n')
def laba3(db_file_name, count_range, schema, schema_data): results = { 'linear': [], 'binary': [], 'binary+sort': [], 'multimap': [], 'hashtable_map_good': [], 'hashtable_map_bad': [], 'bad_collisions': [], 'good_collisions': [] } key = 'fio' max_count_iterations = 2 iterations = len(count_range) bar = IncrementalBar('Countdown', max=iterations) bar.start() for count in count_range: bar.next() print('\n') for count_iterations in range(max_count_iterations): generate(db_file_name, count, schema, schema_data) fp_map = defaultdict(list) fp_list = load_fp_from_file(db_file_name) query_obj = random.choice(fp_list) query = getattr(query_obj, key) print('check lin') linear = check_time(linear_search)(fp_list, key, query) print('check sort+bin') sort_and_bin_search = check_time(sort_and_binary_seach)(fp_list, key, query) print('check bin') bin_search = check_time(binary_search)(fp_list, key, query) print('check multimap') map_search = check_time(fp_map.__getitem__)(query) print('check hashtable good') fp_custom_map_good = HashTable() for el in fp_list: el.set_hash_type('good') fp_map[getattr(el, key)].append(el) fp_custom_map_good.add(el) query_obj.set_hash_type('good') custom_map_good_search = check_time(fp_custom_map_good.get)( Hashes.good_hash(query)) print('check hashtable bad') fp_custom_map_bad = HashTable() for el in fp_list: el.set_hash_type('bad') fp_custom_map_bad.add(el) query_obj.set_hash_type('bad') custom_map_bad_search = check_time(fp_custom_map_bad.get)( Hashes.bad_hash(query)) results['linear'].append((count, linear)) results['binary'].append((count, bin_search)) results['binary+sort'].append((count, sort_and_bin_search)) results['multimap'].append((count, map_search)) results['hashtable_map_good'].append( (count, custom_map_good_search)) results['hashtable_map_bad'].append((count, custom_map_bad_search)) results['bad_collisions'].append( (count, fp_custom_map_bad.collision_count)) results['good_collisions'].append( (count, fp_custom_map_good.collision_count)) plot_graph(results, count_range, max_count_iterations) print('bad_collisions: ', results['bad_collisions']) print('good_collisions: ', results['good_collisions']) bar.finish() return results
def animate_pixels(imfile1,imfile2,outfile,color=False,verbose=False): """Animates a pixel-motion transition between two images. Images must have the exact same number of pixels. Animation is saved as "outfile". Parameters ---------- imfile1 : str or file object The file name or file object for the first image imfile2 : str or file object The file name or file object for the second image outfile : str The output file name color : bool, optional If True, runs in color mode verbose : bool, optional If True, displays a progress bar in the console """ # Read in images if color: img1 = np.array(imread(imfile1))/255 img2 = np.array(imread(imfile2))/255 else: img1 = np.array(imread(imfile1,as_gray=True))/255 img2 = np.array(imread(imfile2,as_gray=True))/255 # Check number of pixels if img1.shape[0]*img1.shape[1] != img2.shape[0]*img2.shape[1]: raise ValueError("Images must have the name number of pixels") # Sort pixels by saturation (if grayscale) or hue (if color) if verbose: bar1 = IncrementalBar("Sorting\t\t", max=2,suffix='%(percent)d%%') if color: rows1,cols1,colors1 = color_to_coords(img1) else: rows1,cols1,colors1 = grayscale_to_coords(img1) if verbose: bar1.next() if color: rows2,cols2,colors2 = color_to_coords(img2) else: rows2,cols2,colors2 = grayscale_to_coords(img2) if verbose: bar1.next(); bar1.finish() # n is number of frames of one-directional transition # buffer is number of stationary frames before and after the transitions # total is number of frames for two transitions with 2 buffer periods each n=100 buffer = 10 total = 2*n+4*buffer # np.linspace creates evenly spaced position and color arrays for transition if verbose: bar2 = IncrementalBar("Interpolating\t",max=4,suffix='%(percent)d%%') colors = np.linspace(colors1,colors2,n) if verbose: bar2.next() rows = np.linspace(rows1+.5,rows2+.5,n) if verbose: bar2.next() cols = np.linspace(cols1+.5,cols2+.5,n) if verbose: bar2.next() pos = np.dstack((rows,cols)) if verbose: bar2.next(); bar2.finish() # Calculate the aspect ratio of the two images aspect_ratio1 = img1.shape[0]/img1.shape[1] aspect_ratio2 = img2.shape[0]/img2.shape[1] plt.ioff() # Figure will always have default matplotlib 6.4 inch width fig = plt.figure(figsize=(6.4,max(aspect_ratio1,aspect_ratio2)*6.4)) ax = fig.add_subplot(111) ax.set_aspect("equal") plt.axis("off") plt.xlim((0,max(img1.shape[1],img2.shape[1]))) plt.ylim((0,max(img1.shape[0],img2.shape[0]))) # Markers are measured in points, which are 1/72nd of an inch. Calculates # pixel size in points pixels = max(img1.shape[1],img2.shape[1]) pixels_per_inch = pixels/6.4 size = 72/pixels_per_inch # core object is a scatter plot with square markers set to pixel size if color: points = ax.scatter(rows[0],cols[0],c=colors1,marker='s',s=size**2) else: points = ax.scatter(rows[0],cols[0],c=colors1,cmap="gray",marker='s',s=size**2,vmin=0,vmax=1) # update function changes the scatter plot at each frame # set_color works for rgb, set_array works for grayscale def update(j): if j >= buffer and j < buffer+n: i = j-buffer points.set_offsets(pos[i]) if color: points.set_color(colors[i]) else: points.set_array(colors[i]) elif j >= 3*buffer+n and j < 3*buffer+2*n: i = n-(j-(3*buffer+n))-1 points.set_offsets(pos[i]) if color: points.set_color(colors[i]) else: points.set_array(colors[i]) if verbose: bar3.next() if verbose: bar3 = IncrementalBar("Rendering\t",max=total,suffix='%(percent)d%%') # Create FuncAnimation with 60-millisecond inteval between frames ani = animation.FuncAnimation(fig,update,frames=total,interval=60) # Save animation and close the figure ani.save(outfile) if verbose: bar3.next(); bar3.finish() plt.close(fig) plt.ion()
def trash_videos(time_limit, extensions, trash_folder_name, sudo): """Trash the videos that are shorter than time_limit to get rid of the shooting errors. Parameters ---------- time_limit : int Duration limit. If a video has a duration smaller than time_limit, it is moved into trash_folder_name. extensions : dict Contains the lists of extensions for each type of file. trash_folder_name : string Name of the folder where to put the trashed videos. Equal to 'Trash' by default but can be change in the video-logging/data.yaml file. sudo : bool Whether sudo mode is activated or not. """ def move_to_trash(file, duration, trash_folder_name): """Move a video to trash if it is too short. Check if a directory named trash_folder_name exists in current directory. If not, create it. Then, move `file` in trash_folder_name if `duration` is smaller than `time_limit`. Parameters ---------- file : string File to check. duration : int Duration of video file. trash_folder_name : string Name of the folder where to put the trashed videos. Equal to 'Trash' by default but can be change in the video-logging/data.yaml file. """ if duration < time_limit: if os.path.exists(trash_folder_name ): # if 'trash_folder_name' already exists if os.path.isfile( trash_folder_name ): # if 'trash_folder_name' is a regular file raise BadFolderName( f"You have a file named '{trash_folder_name}' in the current working directory, which is not a valid file name because this tool uses it as a directory name. You may consider changing the 'trash_folder_name' default in 'data.yaml'." ) else: # if 'trash_folder_name' is a directory pass else: # if 'trash_folder_name' does not exist os.mkdir(f'./{trash_folder_name}') os.rename(file, os.path.join(trash_folder_name, file)) return True return False check_parent(sudo) n = get_number_files(extensions, directory='Videos') if n == 0: raise EmptyFolder( "Nothing to do here, this folder does not countain any video.") bar = IncrementalBar(f"Trashing videos of duration <= {time_limit}s...", max=n) nb_trashed = 0 for file in os.listdir(): extension = os.path.splitext(file)[1] if extension in extensions['Videos']: with VideoFileClip(file) as clip: # we need to wait a little so that bad things do not happen time.sleep(.001) duration = clip.duration is_moved = move_to_trash( file, duration, trash_folder_name) # warning: side effect happening here if is_moved: nb_trashed += 1 bar.next() bar.finish() term = "s" if nb_trashed >= 2 else "" return f"{nb_trashed} video{term} trashed."
# Check the loss loss = error_function(output, labels) optimiser.zero_grad() loss.backward() return loss inputs, labels = inputs.to(device), labels.to(device) # Call the closure and read the loss loss = optimiser.step(closure) training_loss += loss.item() * inputs.size(0) training_bar.next() # Training timer training_time = helper.with_decimal_places(time() - training_timer, 2) print(f" | time taken: {training_time} seconds") validation_bar = IncrementalBar( message='Validating', max=len(validation_loader), suffix="%(percent)d%% [%(elapsed_td)s / %(eta_td)s]") validation_timer = time() # ------------------- # VALIDATION STEP # -------------------
# Get coordinate coord = C['coord'] filepaths = C['filepaths'] # Iterate over filepaths for this coordinate for filepath in filepaths: try: with h5py.File(filepath, 'r') as f: # Open HDF5 file # Find tile coords, Slice tiles and write files ind = couple_indexer(f, coord) imgcnt += couple_slicer(f, ind, hdfcnt, coord, targetpath) # Finish up iteration hdfcnt += 1 # Count processed files bar.next() # Show progress bar # Break out of loops in case of file error except OSError: print('Error opening file:\n' + filepath) fileerror = True break if fileerror: break bar.finish() # Finish progress bar # Report done if no error occurred if not fileerror: print('Done processing {} files. Wrote {} image files.'.format(
def install(package_list): ''' Install A Specified Package(s) ''' password = getpass('Enter your password: '******',') turbocharge = Installer() click.echo('\n') os_bar = IncrementalBar('Getting Operating System...', max = 1) os_bar.next() for package_name in packages: package_name = package_name.strip(' ') if platform == 'linux': click.echo('\n') finding_bar = IncrementalBar('Finding Requested Packages...', max = 1) if package_name in devpackages: show_progress(finding_bar) turbocharge.install_task(devpackages[package_name], f'sudo -S apt-get install -y {package_name}', password, f'{package_name} --version', [f'{devpackages[package_name]} Version']) if package_name in applications: show_progress(finding_bar) turbocharge.install_task(applications[package_name], f'sudo -S snap install --classic {package_name}', password, '', []) if package_name == 'chrome': show_progress(finding_bar) try: click.echo('\n') password = getpass("Enter your password: "******"wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb".split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.wait() second = Popen("sudo -S apt-get install -y ./google-chrome-stable_current_amd64.deb".split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Popen only accepts byte-arrays so you must encode the string second.communicate(password.encode()) # stdoutput = (output)[0].decode('utf-8') click.echo(click.style('\n\n 🎉 Successfully Installed Chrome! 🎉 \n')) # Testing the successful installation of the package testing_bar = IncrementalBar('Testing package...', max = 100) for _ in range(1, 21): time.sleep(0.045) testing_bar.next() os.system('cd --') for _ in range(21, 60): time.sleep(0.045) testing_bar.next() for _ in range(60, 101): time.sleep(0.03) testing_bar.next() click.echo('\n') click.echo(click.style('Test Passed: Chrome Launch ✅\n', fg='green')) except subprocess.CalledProcessError as e: click.echo(e.output) click.echo('An Error Occurred During Installation...', err = True) if package_name == 'anaconda': show_progress(finding_bar) username = getuser() try: installer_progress = Spinner(message=f'Installing {package_name}...', max=100) # sudo requires the flag '-S' in order to take input from stdin for _ in range(1, 35): time.sleep(0.01) installer_progress.next() os.system("wget https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh -O ~/anaconda.sh") for _ in range(35, 61): time.sleep(0.01) installer_progress.next() os.system('bash ~/anaconda.sh -b -p $HOME/anaconda3') for _ in range(61, 91): time.sleep(0.01) installer_progress.next() os.system(f'echo "export PATH="/home/{username}/anaconda3/bin:$PATH"" >> ~/.bashrc') # Popen only accepts byte-arrays so you must encode the string proc.communicate(password.encode()) for _ in range(90, 101): time.sleep(0.01) installer_progress.next() # stdoutput = (output)[0].decode('utf-8') click.echo(click.style(f'\n\n 🎉 Successfully Installed {package_name}! 🎉 \n')) except subprocess.CalledProcessError as e: click.echo(e.output) click.echo('An Error Occurred During Installation...', err = True) if package_name == 'miniconda': show_progress(finding_bar) username = getuser() try: installer_progress = Spinner(message=f'Installing {package_name}...', max=100) # sudo requires the flag '-S' in order to take input from stdin for _ in range(1, 35): time.sleep(0.01) installer_progress.next() os.system("wget https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh -O ~/miniconda.sh") for _ in range(35, 61): time.sleep(0.01) installer_progress.next() os.system('bash ~/anaconda.sh -b -p $HOME/anaconda3') for _ in range(61, 91): time.sleep(0.01) installer_progress.next() os.system(f'echo "export PATH="/home/{username}/anaconda3/bin:$PATH"" >> ~/.bashrc') for _ in range(90, 101): time.sleep(0.01) installer_progress.next() # stdoutput = (output)[0].decode('utf-8') click.echo(click.style(f'\n\n 🎉 Successfully Installed {package_name}! 🎉 \n')) except subprocess.CalledProcessError as e: click.echo(e.output) click.echo('An Error Occurred During Installation...', err = True) elif package_name not in devpackages and package_name not in applications and package_name != 'chrome' and package_name != 'anaconda' and package_name != 'miniconda': click.echo('\n') click.echo(click.style(':( Package Not Found! :(', fg='red'))
def root_search(self): roots = [] prestems = [] poststems = [] bar = IncrementalBar('Searching for prestems and poststems...', max=len(self.text.split(' '))) for u in self.text.split(' '): if u in self.roots: bar.next() continue for s in self.suffixes: if u.endswith(s): prestems.append(u.rstrip(s)) break for p in self.prefixes: if u.startswith(p): poststems.append(u.lstrip(p)) break bar.next() bar.finish() bar = IncrementalBar('Splitting prestems and prefixes...', max=len(prestems)) for u in prestems: coincidence_found = False for p in self.prefixes: if u.startswith(p): roots.append(u.lstrip(p)) coincidence_found = True break if coincidence_found == True: bar.next() continue else: roots.append(u) bar.next() bar.finish() bar = IncrementalBar('Splitting poststems and suffixes...', max=len(poststems)) for u in poststems: coincidence_found = False for s in self.suffixes: if u.endswith(s): roots.append(u.rstrip(s)) coincidence_found = True break if coincidence_found == True: bar.next() continue else: roots.append(u) bar.next() bar.finish() return roots
def migrate(callback): connection = op.get_bind() s = sa.select([n.c.node, n.c.path]) nodes = connection.execute(s).fetchall() bar = IncrementalBar('Migrating node paths...', max=len(nodes)) for node, path in nodes: account, sep, rest = path.partition('/') match = callback(account) if not match: bar.next() continue path = sep.join([match, rest]) u = n.update().where(n.c.node == node).values({'path':path}) connection.execute(u) bar.next() bar.finish() s = sa.select([v.c.muser]).distinct() musers = connection.execute(s).fetchall() bar = IncrementalBar('Migrating version modification users...', max=len(musers) ) for muser, in musers: match = callback(muser) if not match: bar.next() continue u = v.update().where(v.c.muser == muser).values({'muser': match}) connection.execute(u) bar.next() bar.finish() s = sa.select([p.c.public_id, p.c.path]) public = connection.execute(s).fetchall() bar = IncrementalBar('Migrating public paths...', max=len(public)) for id, path in public: account, sep, rest = path.partition('/') match = callback(account) if not match: bar.next() continue path = sep.join([match, rest]) u = p.update().where(p.c.public_id == id).values({'path':path}) connection.execute(u) bar.next() bar.finish() s = sa.select([x.c.feature_id, x.c.path]) xfeatures = connection.execute(s).fetchall() bar = IncrementalBar('Migrating permission paths...', max=len(xfeatures)) for id, path in xfeatures: account, sep, rest = path.partition('/') match = callback(account) if not match: bar.next() continue path = sep.join([match, rest]) u = x.update().where(x.c.feature_id == id).values({'path':path}) connection.execute(u) bar.next() bar.finish() s = sa.select([xvals.c.feature_id, xvals.c.key, xvals.c.value]) s = s.where(xvals.c.value != '*') xfeaturevals = connection.execute(s).fetchall() bar = IncrementalBar('Migrating permission holders...', max=len(xfeaturevals)) for feature_id, key, value in xfeaturevals: account, sep, group = value.partition(':') match = callback(account) if not match: bar.next() continue new_value = sep.join([match, group]) u = xvals.update() u = u.where(and_( xvals.c.feature_id == feature_id, xvals.c.key == key, xvals.c.value == value)) u = u.values({'value':new_value}) connection.execute(u) bar.next() bar.finish() s = sa.select([g.c.owner, g.c.name, g.c.member]) groups = connection.execute(s).fetchall() bar = IncrementalBar('Migrating group owners & members...', max=len(groups)) for owner, name, member in groups: owner_match = callback(owner) member_match = callback(member) if owner_match or member_match: u = g.update() u = u.where(and_( g.c.owner == owner, g.c.name == name, g.c.member == member)) values = {} if owner_match: values['owner'] = owner_match if member_match: values['member'] = member_match u = u.values(values) connection.execute(u) bar.next() bar.finish()
def jds_wf_simple_reader(directory, no_of_spectra_to_average, skip_data_blocks, VminNorm, VmaxNorm, colormap, custom_dpi, save_long_file_aver, dyn_spectr_save_init, dyn_spectr_save_norm): current_time = time.strftime("%H:%M:%S") current_date = time.strftime("%d.%m.%Y") # *** Creating a folder where all pictures and results will be stored (if it doesn't exist) *** result_folder = 'RESULTS_JDS_waveform_' + directory.split('/')[-2] if not os.path.exists(result_folder): os.makedirs(result_folder) service_folder = result_folder + '/Service' if not os.path.exists(service_folder): os.makedirs(service_folder) if dyn_spectr_save_init == 1: initial_spectra_folder = result_folder + '/Initial spectra' if not os.path.exists(initial_spectra_folder): os.makedirs(initial_spectra_folder) # *** Search JDS files in the directory *** file_list = find_files_only_in_current_folder(directory, '.jds', 1) print('') if len( file_list ) > 1: # Check if files have same parameters if there are more then one file in list # Check if all files (except the last) have same size same_or_not = check_if_all_files_of_same_size(directory, file_list, 1) # Check if all files in this folder have the same parameters in headers equal_or_not = check_if_JDS_files_of_equal_parameters( directory, file_list) if same_or_not and equal_or_not: print( '\n\n\n :-) All files seem to be of the same parameters! :-) \n\n\n' ) else: print( '\n\n\n ************************************************************************************* ' ) print( ' * *' ) print( ' * Seems files in folders are different check the errors and restart the script! *' ) print( ' * * ' '\n ************************************************************************************* \n\n\n' ) decision = int( input( '* Enter "1" to start processing, or "0" to stop the script: ' )) if decision != 1: sys.exit( '\n\n\n *** Program stopped! *** \n\n\n') # To print in console the header of first file print('\n First file header parameters: \n') # *** Data file header read *** [ df_filename, df_filesize, df_system_name, df_obs_place, df_description, CLCfrq, df_creation_timeUTC, Channel, ReceiverMode, Mode, Navr, TimeRes, fmin, fmax, df, frequency, freq_points_num, data_block_size ] = FileHeaderReaderJDS(directory + file_list[0], 0, 1) # Main loop by files start for file_no in range(len(file_list)): # loop by files # *** Opening datafile *** fname = directory + file_list[file_no] # ********************************************************************************* # *** Data file header read *** [ df_filename, df_filesize, df_system_name, df_obs_place, df_description, CLCfrq, df_creation_timeUTC, Channel, ReceiverMode, Mode, Navr, TimeRes, fmin, fmax, df, frequency, freq_points_num, data_block_size ] = FileHeaderReaderJDS(fname, 0, 0) # Create long data files and copy first data file header to them if file_no == 0 and save_long_file_aver == 1: with open(fname, 'rb') as file: # *** Data file header read *** file_header = file.read(1024) # *** Creating a name for long timeline TXT file *** tl_file_name = df_filename + '_Timeline.txt' tl_file = open( tl_file_name, 'w') # Open and close to delete the file with the same name tl_file.close() # *** Creating a binary file with data for long data storage *** file_data_a_name = df_filename + '_Data_chA.dat' file_data_a = open(file_data_a_name, 'wb') file_data_a.write(file_header) file_data_a.seek(574) # FFT size place in header file_data_a.write(np.int32(data_block_size).tobytes()) file_data_a.seek(624) # Lb place in header file_data_a.write(np.int32(0).tobytes()) file_data_a.seek(628) # Hb place in header file_data_a.write(np.int32(data_block_size / 2).tobytes()) file_data_a.seek(632) # Wb place in header file_data_a.write(np.int32(data_block_size / 2).tobytes()) file_data_a.seek(636) # Navr place in header file_data_a.write( bytes([np.int32(Navr * no_of_spectra_to_average)])) file_data_a.close() if Channel == 2: file_data_b_name = df_filename + '_Data_chB.dat' file_data_b = open(file_data_b_name, 'wb') file_data_b.write(file_header) file_data_b.seek(574) # FFT size place in header file_data_b.write(np.int32(data_block_size).tobytes()) file_data_b.seek(624) # Lb place in header file_data_b.write(np.int32(0).tobytes()) file_data_b.seek(628) # Hb place in header file_data_b.write(np.int32(data_block_size / 2).tobytes()) file_data_b.seek(632) # Wb place in header file_data_b.write(np.int32(data_block_size / 2).tobytes()) file_data_b.seek(636) # Navr place in header file_data_b.write( bytes([np.int32(Navr * no_of_spectra_to_average)])) file_data_b.close() del file_header # !!! Make automatic calculations of time and frequency resolutions for waveform mode!!! # Manually set frequencies for one channel mode if (Channel == 0 and int(CLCfrq / 1000000) == 66) or (Channel == 1 and int(CLCfrq / 1000000) == 66): freq_points_num = 8192 frequency = np.linspace(0.0, 33.0, freq_points_num) # Manually set frequencies for two channels mode if Channel == 2 or (Channel == 0 and int(CLCfrq / 1000000) == 33) or ( Channel == 1 and int(CLCfrq / 1000000) == 33): freq_points_num = 8192 frequency = np.linspace(16.5, 33.0, freq_points_num) # For new receiver (temporary): if Channel == 2 and int(CLCfrq / 1000000) == 80: freq_points_num = 8192 frequency = np.linspace(0.0, 40.0, freq_points_num) # Calculation of number of blocks and number of spectra in the file if Channel == 0 or Channel == 1: # Single channel mode no_of_av_spectra_per_file = (df_filesize - 1024) / ( 2 * data_block_size * no_of_spectra_to_average) else: # Two channels mode no_of_av_spectra_per_file = (df_filesize - 1024) / ( 4 * data_block_size * no_of_spectra_to_average) no_of_blocks_in_file = (df_filesize - 1024) / data_block_size no_of_av_spectra_per_file = int(no_of_av_spectra_per_file) fine_clock_frq = (int(CLCfrq / 1000000.0) * 1000000.0) # Real time resolution of averaged spectra real_av_spectra_dt = (1 / fine_clock_frq) * ( data_block_size - 4) * no_of_spectra_to_average if file_no == 0: print(' Number of blocks in file: ', no_of_blocks_in_file) print(' Number of spectra to average: ', no_of_spectra_to_average) print(' Number of averaged spectra in file: ', no_of_av_spectra_per_file) print(' Time resolution of averaged spectrum: ', round(real_av_spectra_dt * 1000, 3), ' ms.') print('\n *** Reading data from file *** \n') # ******************************************************************************* # R E A D I N G D A T A * # ******************************************************************************* with open(fname, 'rb') as file: file.seek( 1024 + data_block_size * 4 * skip_data_blocks) # Jumping to 1024 byte from file beginning # *** DATA READING process *** # Preparing arrays for dynamic spectra dyn_spectra_ch_a = np.zeros( (int(data_block_size / 2), no_of_av_spectra_per_file), float) if Channel == 2: # Two channels mode dyn_spectra_ch_b = np.zeros( (int(data_block_size / 2), no_of_av_spectra_per_file), float) # !!! Fake timing. Real timing to be done!!! # TimeFigureScaleFig = np.linspace(0, no_of_av_spectra_per_file, no_of_av_spectra_per_file+1) # for i in range(no_of_av_spectra_per_file): # TimeFigureScaleFig[i] = str(TimeFigureScaleFig[i]) time_scale_fig = [] time_scale_full = [] bar = IncrementalBar(' File ' + str(file_no + 1) + ' of ' + str(len(file_list)) + ' reading: ', max=no_of_av_spectra_per_file, suffix='%(percent)d%%') for av_sp in range(no_of_av_spectra_per_file): bar.next() # Reading and reshaping all data with readers if Channel == 0 or Channel == 1: # Single channel mode wf_data = np.fromfile(file, dtype='i2', count=no_of_spectra_to_average * data_block_size) wf_data = np.reshape( wf_data, [data_block_size, no_of_spectra_to_average], order='F') if Channel == 2: # Two channels mode wf_data = np.fromfile(file, dtype='i2', count=2 * no_of_spectra_to_average * data_block_size) wf_data = np.reshape( wf_data, [data_block_size, 2 * no_of_spectra_to_average], order='F') # Timing timeline_block_str = jds_waveform_time(wf_data, CLCfrq, data_block_size) time_scale_fig.append(timeline_block_str[-1][0:12]) time_scale_full.append(df_creation_timeUTC[0:10] + ' ' + timeline_block_str[-1][0:12]) # Nulling the time blocks in waveform data wf_data[data_block_size - 4:data_block_size, :] = 0 # Scaling of the data - seems to be wrong in absolute value wf_data = wf_data / 32768.0 if Channel == 0 or Channel == 1: # Single channel mode wf_data_ch_a = wf_data # All the data is channel A data del wf_data # Deleting unnecessary array to free the memory if Channel == 2: # Two channels mode # Resizing to obtain the matrix for separation of channels wf_data_new = np.zeros( (2 * data_block_size, no_of_spectra_to_average)) for i in range(2 * no_of_spectra_to_average): if i % 2 == 0: wf_data_new[0:data_block_size, int(i / 2)] = wf_data[:, i] # Even else: wf_data_new[data_block_size:2 * data_block_size, int(i / 2)] = wf_data[:, i] # Odd del wf_data # Deleting unnecessary array to free the memory # Separating the data into two channels wf_data_ch_a = np.zeros( (data_block_size, no_of_spectra_to_average)) # Preparing empty array wf_data_ch_b = np.zeros( (data_block_size, no_of_spectra_to_average)) # Preparing empty array wf_data_ch_a[:, :] = wf_data_new[0:( 2 * data_block_size):2, :] # Separation to channel A wf_data_ch_b[:, :] = wf_data_new[1:( 2 * data_block_size):2, :] # Separation to channel B del wf_data_new # preparing matrices for spectra spectra_ch_a = np.zeros_like(wf_data_ch_a) if Channel == 2: spectra_ch_b = np.zeros_like(wf_data_ch_b) # Calculation of spectra for i in range(no_of_spectra_to_average): spectra_ch_a[:, i] = np.power( np.abs(np.fft.fft(wf_data_ch_a[:, i])), 2) if Channel == 2: # Two channels mode spectra_ch_b[:, i] = np.power( np.abs(np.fft.fft(wf_data_ch_b[:, i])), 2) # Storing only first (left) mirror part of spectra spectra_ch_a = spectra_ch_a[:int(data_block_size / 2), :] if Channel == 2: spectra_ch_b = spectra_ch_b[:int(data_block_size / 2), :] # At 33 MHz the specter is usually upside down, to correct it we use flip up/down if int(CLCfrq / 1000000) == 33: spectra_ch_a = np.flipud(spectra_ch_a) if Channel == 2: spectra_ch_b = np.flipud(spectra_ch_b) # Plotting first waveform block and first immediate spectrum in a file if av_sp == 0: # First data block in a file i = 0 # First immediate spectrum in a block # Prepare parameters for plot data_1 = wf_data_ch_a[:, i] if Channel == 0 or Channel == 1: # Single channel mode no_of_sets = 1 data_2 = [] if Channel == 2: no_of_sets = 2 data_2 = wf_data_ch_b[:, i] suptitle = ('Waveform data, first block in file ' + str(df_filename)) Title = (ReceiverMode + ', Fclock = ' + str(round(CLCfrq / 1000000, 1)) + ' MHz, Description: ' + str(df_description)) TwoOrOneValuePlot( no_of_sets, np.linspace(no_of_sets, data_block_size, data_block_size), data_1, data_2, 'Channel A', 'Channel B', 1, data_block_size, -0.6, 0.6, -0.6, 0.6, 'ADC clock counts', 'Amplitude, V', 'Amplitude, V', suptitle, Title, service_folder + '/' + df_filename[0:14] + ' Waveform first data block.png', current_date, current_time, software_version) # Prepare parameters for plot data_1 = 10 * np.log10(spectra_ch_a[:, i]) if Channel == 0 or Channel == 1: # Single channel mode no_of_sets = 1 data_2 = [] if Channel == 2: no_of_sets = 2 data_2 = 10 * np.log10(spectra_ch_b[:, i]) suptitle = ('Immediate spectrum, first in file ' + str(df_filename)) Title = (ReceiverMode + ', Fclock = ' + str(round(CLCfrq / 1000000, 1)) + ' MHz, Description: ' + str(df_description)) TwoOrOneValuePlot( no_of_sets, frequency, data_1, data_2, 'Channel A', 'Channel B', frequency[0], frequency[-1], -80, 60, -80, 60, 'Frequency, MHz', 'Intensity, dB', 'Intensity, dB', suptitle, Title, service_folder + '/' + df_filename[0:14] + ' Immediate spectrum first in file.png', current_date, current_time, software_version) # Deleting the unnecessary matrices del wf_data_ch_a if Channel == 2: del wf_data_ch_b # Calculation the averaged spectrum aver_spectra_ch_a = spectra_ch_a.mean(axis=1)[:] if Channel == 2: aver_spectra_ch_b = spectra_ch_b.mean(axis=1)[:] # Plotting only first averaged spectrum if av_sp == 0: # Prepare parameters for plot data_1 = 10 * np.log10(aver_spectra_ch_a) if Channel == 0 or Channel == 1: # Single channel mode no_of_sets = 1 data_2 = [] if Channel == 2: no_of_sets = 2 data_2 = 10 * np.log10(aver_spectra_ch_b) suptitle = ('Average spectrum, first data block in file ' + str(df_filename)) Title = (ReceiverMode + ', Fclock = ' + str(round(CLCfrq / 1000000, 1)) + ' MHz, Avergaed spectra: ' + str(no_of_spectra_to_average) + ', Description: ' + str(df_description)) TwoOrOneValuePlot( no_of_sets, frequency, data_1, data_2, 'Channel A', 'Channel B', frequency[0], frequency[-1], -80, 60, -80, 60, 'Frequency, MHz', 'Intensity, dB', 'Intensity, dB', suptitle, Title, service_folder + '/' + df_filename[0:14] + ' Average spectrum first data block in file.png', current_date, current_time, software_version) # Adding calculated averaged spectrum to dynamic spectra array dyn_spectra_ch_a[:, av_sp] = aver_spectra_ch_a[:] if Channel == 2: dyn_spectra_ch_b[:, av_sp] = aver_spectra_ch_b[:] bar.finish() # file.close() # Close the data file # Saving averaged spectra to long data files if save_long_file_aver == 1: temp = dyn_spectra_ch_a.transpose().copy(order='C') file_data_a = open(file_data_a_name, 'ab') file_data_a.write(temp) file_data_a.close() if Channel == 2: temp = dyn_spectra_ch_b.transpose().copy(order='C') file_data_b = open(file_data_b_name, 'ab') file_data_b.write(temp) file_data_b.close() # Saving time data to ling timeline file with open(tl_file_name, 'a') as tl_file: for i in range(no_of_av_spectra_per_file): tl_file.write((time_scale_full[i][:]) + ' \n') # str del time_scale_full # Log data (make dB scale) with np.errstate(invalid='ignore', divide='ignore'): dyn_spectra_ch_a = 10 * np.log10(dyn_spectra_ch_a) if Channel == 2: dyn_spectra_ch_b = 10 * np.log10(dyn_spectra_ch_b) # If the data contains minus infinity values change them to particular values dyn_spectra_ch_a[np.isinf(dyn_spectra_ch_a)] = 40 if Channel == 2: dyn_spectra_ch_b[np.isinf(dyn_spectra_ch_b)] = 40 # ******************************************************************************* # P L O T T I N G D Y N A M I C S P E C T R A * # ******************************************************************************* # if dyn_spectr_save_init == 1 or dyn_spectr_save_norm == 1: # print('\n *** Making figures of dynamic spectra *** \n') if dyn_spectr_save_init == 1: # Plot of initial dynamic spectra v_min_a = np.min(dyn_spectra_ch_a) v_max_a = np.max(dyn_spectra_ch_a) v_min_b = v_min_a v_max_b = v_max_a if Channel == 2: v_min_b = np.min(dyn_spectra_ch_b) v_max_b = np.max(dyn_spectra_ch_b) if Channel == 0 or Channel == 1: # Single channel mode dyn_spectra_ch_b = dyn_spectra_ch_a suptitle = ('Dynamic spectrum (initial) ' + str(df_filename) + ' - Fig. ' + str(1) + ' of ' + str(1) + '\n Initial parameters: dt = ' + str(round(TimeRes * 1000., 3)) + ' ms, df = ' + str(round(df / 1000., 3)) + ' kHz, Receiver: ' + str(df_system_name) + ', Place: ' + str(df_obs_place) + '\n' + ReceiverMode + ', Fclock = ' + str(round(CLCfrq / 1000000, 1)) + ' MHz, Avergaed spectra: ' + str(no_of_spectra_to_average) + ' (' + str(round(no_of_spectra_to_average * TimeRes, 3)) + ' sec.), Description: ' + str(df_description)) fig_file_name = (initial_spectra_folder + '/' + df_filename[0:14] + ' Initial dynamic spectrum fig.' + str(0 + 1) + '.png') if Channel == 0 or Channel == 1: # Single channel mode OneDynSpectraPlot(dyn_spectra_ch_a, v_min_a, v_max_a, suptitle, 'Intensity, dB', no_of_av_spectra_per_file, time_scale_fig, frequency, freq_points_num, colormap, 'UTC Time, HH:MM:SS.msec', fig_file_name, current_date, current_time, software_version, custom_dpi) if Channel == 2: TwoDynSpectraPlot(dyn_spectra_ch_a, dyn_spectra_ch_b, v_min_a, v_max_a, v_min_b, v_max_b, suptitle, 'Intensity, dB', 'Intensity, dB', no_of_av_spectra_per_file, time_scale_fig, time_scale_fig, frequency, freq_points_num, colormap, 'Channel A', 'Channel B', fig_file_name, current_date, current_time, software_version, custom_dpi) if dyn_spectr_save_norm == 1: # Normalization and cleaning of data Normalization_dB(dyn_spectra_ch_a.transpose(), freq_points_num, no_of_av_spectra_per_file) if Channel == 2: Normalization_dB(dyn_spectra_ch_b.transpose(), freq_points_num, no_of_av_spectra_per_file) simple_channel_clean(dyn_spectra_ch_a, 8) if Channel == 2: simple_channel_clean(dyn_spectra_ch_b, 8) # Plot of normalized and cleaned dynamic spectra suptitle = ('Normalized and cleaned dynamic spectrum (initial) ' + str(df_filename) + ' - Fig. ' + str(0 + 1) + ' of ' + str(1) + '\n Initial parameters: dt = ' + str(round(TimeRes * 1000, 3)) + ' ms, df = ' + str(round(df / 1000., 3)) + ' kHz, Receiver: ' + str(df_system_name) + ', Place: ' + str(df_obs_place) + '\n' + ReceiverMode + ', Fclock = ' + str(round(CLCfrq / 1000000, 1)) + ' MHz, Avergaed spectra: ' + str(no_of_spectra_to_average) + ' (' + str(round(no_of_spectra_to_average * TimeRes, 3)) + ' sec.), Description: ' + str(df_description)) fig_file_name = (result_folder + '/' + df_filename[0:14] + ' Normalized and cleaned dynamic spectrum fig.' + str(0 + 1) + '.png') if Channel == 0 or Channel == 1: # Single channel mode OneDynSpectraPlot(dyn_spectra_ch_a, VminNorm, VmaxNorm, suptitle, 'Intensity, dB', no_of_av_spectra_per_file, time_scale_fig, frequency, freq_points_num, colormap, 'UTC Time, HH:MM:SS.msec', fig_file_name, current_date, current_time, software_version, custom_dpi) if Channel == 2: TwoDynSpectraPlot(dyn_spectra_ch_a, dyn_spectra_ch_b, VminNorm, VmaxNorm, VminNorm, VmaxNorm, suptitle, 'Intensity, dB', 'Intensity, dB', no_of_av_spectra_per_file, time_scale_fig, time_scale_fig, frequency, freq_points_num, colormap, 'Channel A', 'Channel B', fig_file_name, current_date, current_time, software_version, custom_dpi) del time_scale_fig, file_data_a if Channel == 2: del file_data_b results_files_list = [] results_files_list.append(file_data_a_name) if Channel == 2: results_files_list.append(file_data_b_name) return results_files_list
def _run_epoch( self, model, dataloader, optimize=False, save_activations=False, reweight=None, bit_pretrained=False, adv_metrics=False, ): """Runs the model on a given dataloader. Note: The latter item in the returned tuple is what is necessary to run GEORGECluster.train and GEORGECluster.evaluate. Args: model(nn.Module): A PyTorch model. dataloader(DataLoader): The dataloader. The dataset within must subclass GEORGEDataset. optimize(bool, optional): If True, the model is trained on self.criterion. save_activations(bool, optional): If True, saves the activations in `outputs`. Default is False. bit_pretrained(bool, optional): If True, assumes bit_pretrained and does not evaluate performance metrics Returns: metrics(Dict[str, Any]) A dictionary object that stores the metrics defined in self.config['metric_types']. outputs(Dict[str, Any]) A dictionary object that stores artifacts necessary for model analysis, including labels, activations, and predictions. """ dataset = dataloader.dataset self._check_dataset(dataset) type_to_num_classes = { label_type: dataset.get_num_classes(label_type) for label_type in LABEL_TYPES if label_type in dataset.Y_dict.keys() } outputs = { "metrics": None, "activations": [], "superclass": [], "subclass": [], "true_subclass": [], "alt_subclass": [], "targets": [], "probs": [], "preds": [], "losses": [], "reweight": [], } activations_handle = self._init_activations_hook( model, outputs["activations"]) if optimize: progress_prefix = "Training" model.train() else: progress_prefix = "Evaluation" model.eval() per_class_meters = self._init_per_class_meters(type_to_num_classes) metric_meters = { k: AverageMeter() for k in ["loss", "acc", "loss_rw", "acc_rw"] } progress = self.config["show_progress"] if progress: bar = ProgressBar(progress_prefix, max=len(dataloader), width=50) for batch_idx, (inputs, targets) in enumerate(dataloader): batch_size = len(inputs) if self.use_cuda: inputs, targets = move_to_device([inputs, targets], device=self.device) type_to_labels = {} for label_type in type_to_num_classes.keys(): type_to_labels[label_type] = targets[label_type] outputs[label_type].append(targets[label_type]) if optimize and not bit_pretrained: logits = model(inputs) loss_targets = targets["superclass"] co = self.criterion(logits, loss_targets, targets["subclass"]) loss, (losses, corrects), _ = co self.optimizer.zero_grad() loss.backward() self.optimizer.step() else: with torch.no_grad(): logits = model(inputs) loss_targets = targets["superclass"] if bit_pretrained: if progress: bar.suffix = PROGRESS_BAR_SUFFIX.format( batch=batch_idx + 1, size=len(dataloader), total=format_timedelta(bar.elapsed_td), eta=format_timedelta(bar.eta_td), **{k: 0 for k in prog_metric_names}, ) bar.next() continue co = self.criterion(logits, loss_targets, targets["subclass"]) loss, (losses, corrects), _ = co if not save_activations: outputs["activations"].pop() # delete activations reweight_vec = (None if reweight is None else reweight[targets["true_subclass"]]) metrics = self._compute_progress_metrics( losses, corrects, type_to_labels, type_to_num_classes, per_class_meters, reweight=reweight_vec, ) acc, preds = compute_accuracy(logits.data, loss_targets.data, return_preds=True) outputs["probs"].append( F.softmax(logits, dim=1).detach().cpu()[:, 1]) outputs["preds"].append(preds) outputs["losses"].append(losses.detach().cpu()) outputs["targets"].append(loss_targets.detach().cpu()) if reweight_vec is not None: outputs["reweight"].append(reweight_vec.cpu()) self._update_metrics(metric_meters, acc, loss, losses, corrects, batch_size, reweight_vec) PROGRESS_BAR_STR = PROGRESS_BAR_SUFFIX if self.compute_auroc: sub_map = dataloader.dataset.get_class_map("subclass") assert set(sub_map.keys()) == {0, 1} # must be a binary problem targets_cat, probs_cat = torch.cat( outputs["targets"]), torch.cat(outputs["probs"]) auroc = compute_roc_auc(targets_cat, probs_cat) metrics["auroc"] = auroc has_alt_subclass = "alt_subclass" in dataloader.dataset.Y_dict for key in ["subclass", "true_subclass" ] + ["alt_subclass"] * has_alt_subclass: sub_map = dataloader.dataset.get_class_map(key) neg_subclasses = sub_map[0] pos_subclasses = sub_map[1] if len(neg_subclasses) == len(pos_subclasses) == 1: # only one subclass in each superclass rob_auroc = auroc else: subclass_labels = torch.cat(outputs[key]) paired_aurocs = [] for neg_subclass in neg_subclasses: for pos_subclass in pos_subclasses: inds = ((subclass_labels == neg_subclass) | (subclass_labels == pos_subclass)).cpu() subset_pair_auroc = compute_roc_auc( targets_cat[inds], probs_cat[inds]) paired_aurocs.append(subset_pair_auroc) rob_auroc = min(paired_aurocs) metrics[f"{key}_rob_auroc"] = rob_auroc if not has_alt_subclass: metrics["alt_subclass_rob_auroc"] = auroc PROGRESS_BAR_STR += ( " | AUROC: {auroc:.4f} | R AUROC: {subclass_rob_auroc:.4f} | " "TR AUROC: {true_subclass_rob_auroc:.4f} | AR AUROC: {alt_subclass_rob_auroc:.4f}" ) if progress: bar.suffix = PROGRESS_BAR_STR.format( batch=batch_idx + 1, size=len(dataloader), total=format_timedelta(bar.elapsed_td), eta=format_timedelta(bar.eta_td), **{ **metrics, **{k: v.avg for k, v in metric_meters.items()} }, ) bar.next() if progress: bar.finish() if activations_handle: activations_handle.remove() for k, v in outputs.items(): if type(v) == list and len(v) > 0: outputs[k] = concatenate_iterable(v) if bit_pretrained: return outputs["metrics"], outputs outputs["metrics"] = metrics outputs["metrics"].update( {k: float(v.avg) for k, v in metric_meters.items()}) outputs["metrics"].update(self._compute_aggregate_metrics(outputs)) self._print_output_metrics(outputs) if adv_metrics: scaa = np.mean([ ga.avg * 100 for ga in np.array(per_class_meters[f"per_true_subclass_accs"]) ]) self.logger.info( f'All accs: {[ga.avg * 100 for ga in np.array(per_class_meters[f"per_true_subclass_accs"])]}' ) self.logger.info(f"SCAA: {scaa:.3f}") ap = sklearn.metrics.average_precision_score( outputs["targets"], outputs["probs"], sample_weight=outputs["reweight"] if reweight_vec is not None else None, ) self.logger.info(f"MaP: {ap:.4f}") return outputs["metrics"], outputs
class SampleDumpHandler(object): def __init__(self,debug=False,samplelist=None): super(SampleDumpHandler,self).__init__() self.debug=debug self.samplelist = samplelist self.reset() def __del__(self): if len(self.data): self.saveFile() def reset(self): self.header = {} self.data = [] self.lastpacket = 0 self.raw = [] self.packetcounter = 0 self.dump_start = 0 self.exppacket = 0 self.starttime = 0 def parse(self,msg): status = None if msg[3] == 0x1: status = self.parseHeader(msg) elif msg[3] == 0x2: status = self.parsePacket(msg) elif msg[3] == 0x3: status = self.parseRequest(msg) elif msg[3] == 0x7F and self.dump_start > 0: status = self.continueDump() return status def parseHeader(self, msg): self.reset() if len(msg) != 21: print "Size mismatch, is", len(msg) return HandshakeMessage.NAK(packetnumber=self.lastpacket) speriod = int(msg[9] << 14 | msg[8] << 7 | msg[7]) srate = 1./(speriod *1e-9) self.header = { "target_id" : msg[2], "sample_number" : msg[5] << 7 | msg[4], "sample_format" : msg[6], "sample_period" : speriod, "sample_rate" : srate, "sample_length" : msg[12] << 14 | msg[11] << 7 | msg[10], "sample_loop_start": msg[15] << 14 | msg[14] << 7 | msg[13], "sample_loop_end" : msg[18] << 14 | msg[17] << 7 | msg[16], "loop_type" : msg[19], } if self.debug: print "Sample Dump Header" print " Data:" for k,v in self.header.iteritems(): print " %s:" % k, v self.raw += msg format = int(self.header["sample_format"]) length = int(self.header["sample_length"]) self.exppacket = (format+6)/7*length/120+1 self.starttime = time.time() self.bar = IncrementalBar( "Receiving sample dump", max=self.exppacket, suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]') return HandshakeMessage.ACK(packetnumber=self.lastpacket) def parsePacket(self, msg): if not 0xF7 in msg: print "printSampleDumpDataPacket: could not find EOX" return HandshakeMessage.NAK(packetnumber=self.lastpacket) cs = msg.index(0xF7)-1 calced_cs = checksum(msg[1:cs]) if self.debug: print "Sample Dump Data Packet" print " Data:" print " Packet count", msg[4] print " checksum:", hex(msg[cs]), \ "(calculated 0x%x)" % calced_cs if msg[cs] != calced_cs: print "Checksum mismatch:", hex(msg[cs]), "should be", hex(calced_cs) return HandshakeMessage.NAK(packetnumber=self.lastpacket) offset = 5 format = int(self.header['sample_format']) if format == 14: self.data += msg[offset:offset+120] else: print format, "bit samples are not supported" self.lastpacket = msg[4] self.raw += msg self.packetcounter += 1 self.bar.next() return HandshakeMessage.ACK(packetnumber=self.lastpacket) def parseRequest(self,msg): self.reset() if not 0xF7 in msg: print "printSampleDumpDataPacket: could not find EOX" return HandshakeMessage.NAK(packetnumber=self.lastpacket) samplenumber = int(msg[5] << 7 | msg[4]) print "Received Sample Dump Request for sample", samplenumber if self.debug: print " Data:" print " targetid:", msg[2] print " samplenumber:", samplenumber samplefile = None if self.samplelist and samplenumber < len(self.samplelist): samplefile = self.samplelist[samplenumber] print "Selected list index", samplenumber, repr(samplefile) if not samplefile or not os.path.exists(samplefile): samplefile = "sample.sds" print "Selected fallback", repr(samplefile) if not os.path.exists(samplefile): print "No sample to send" return HandshakeMessage.Cancel(packetnumber=self.lastpacket) f = open(samplefile, "rb") self.raw = [ ord(i) for i in f.read() ] f.close() n = self.raw.count(0xF7) if n > 0: print "Sending", n, "Sample Dump Packets (+ header)" self.starttime = time.time() self.dump_start = self.raw.index(0xF7)+1 self.packetcounter += 1 return self.raw[:self.dump_start] return HandshakeMessage.Cancel(packetnumber=self.lastpacket) def continueDump(self): n = self.raw[self.dump_start:].count(0xF7) if n == 0: elapsed = time.time()-self.starttime print "Sent %d packets in %.1f seconds (%.1f bytes/sec)" % ( self.packetcounter, elapsed, len(self.raw)/elapsed) self.reset() return HandshakeMessage.EOF(packetnumber=self.lastpacket) ds = self.dump_start self.dump_start = self.raw.index(0xF7,self.dump_start)+1 if self.packetcounter % 100 == 0: print "Sent %d packets" % self.packetcounter self.packetcounter += 1 return self.raw[ds:self.dump_start] def saveFile(self, filename=None): self.bar.finish() if not filename: timestamp = time.strftime("%Y%m%d%H%M%S") filename = "sample_%s" % timestamp rate = self.packetcounter*120/(time.time()-self.starttime) print "Packets received: %d/%d" % (self.packetcounter, self.exppacket) print "Average rate: %.1f bytes/sec" % rate print "Saving to", filename # concatenation of sysex messages with open(filename+".sds", "wb") as f: f.write(bytearray(self.raw)) # adjust data size to sample length nsamples = int(self.header.get('sample_length',len(self.data)/2)) self.data = self.data[:nsamples*2] # sample data only (7-in-8-bit chunks, big-endian: .dcba987 .6543210) with open(filename+".dmp", "wb") as f: f.write(bytearray(self.data)) # decoded sample data format = int(self.header['sample_format']) out = [] if format == 14: pos = 0 while pos < len(self.data): # assume big-endian tmp = self.data[pos] << 7 | self.data[pos+1] # convert to s16le tmp = u2s(tmp<<2) out.append(tmp & 0xFF) out.append((tmp >> 8) & 0xFF) pos += 2 print else: print format, "bit samples are not supported" if len(out): # write raw file with open(filename+".raw", "wb") as f: f.write(bytearray(out)) # write WAV file writeWAV(filename+".wav",int(self.header.get("sample_rate", 22050)), bytearray(out)) # sample properties with open(filename+".txt", "w") as f: f.writelines( [ "%s: %s\n" % i for i in self.header.iteritems() ] ) f.writelines( [ "file_%s: %s.%s\n" % (suffix,filename,suffix) for suffix in [ 'sds', 'raw', 'dmp', 'wav' ] ]) self.reset()
def download(url, path=DEFAULT_PATH): """Download HTML page and page assets (img, css files) from given 'url'.""" # Generate output 'page_name' and 'file_path' and load page page_name = get_filename(url=url) file_path = get_full_path(path, page_name) # Make request, edit Soup object and save data into output file content = make_request(url) soup = BeautifulSoup(content, "html.parser") # Get list of links links = get_links(tag_meta=ASSET_TAGS, url=url, soup=soup) # Edit Soup object and replace links to loclal files if links: # Generate folder name and path folder_name = get_foldername(url=url) folder_path = get_full_path(path, folder_name) # Create output directory (id doesn't exist) if not os.path.isdir(folder_path): create_dir(local_path=folder_path) to_download = [] # Initiate download queue # Iterate links and edit soup object for link_dict in links: # Destructure link's dict fact_link, abs_link, tag = itemgetter('fact_link', 'abs_link', 'tag')(link_dict) # Generate file_name, local path & local link for item file_name = get_filename(url=abs_link) local_path = get_full_path(path, folder_name, file_name) local_link = get_full_path(folder_name, file_name) # Edit soup object soup = edit_soup(url=fact_link, tag=tag, meta=ASSET_TAGS[tag], local_link=local_link, soup=soup) # Add asset's absolute url and local_path into queue to_download.append((abs_link, local_path)) # Save modified soup save_file(data=soup.prettify(), local_path=file_path, mode='w') # Initiate progress bar and download assets progress_bar = IncrementalBar('Loading resourses:', max=len(to_download)) for abs_link, local_path in to_download: try: content = make_request(abs_link) save_file(data=content, local_path=local_path) except Exception: logger.error(f'Asset \'{abs_link}\' was not downloaded.') progress_bar.next() # Iterate progress bar # Finish progess_bar & return output's file path progress_bar.finish() return file_path
def cargar_tweets(limite=None, agregar_sexuales=False, cargar_features=True): """Carga todos los tweets, inclusive aquellos para evaluación, aunque no se quiera evaluar, y aquellos mal votados, así se calculan las features para todos. Que el filtro se haga luego.""" conexion = open_db() if DB_ENGINE == 'sqlite3': cursor = conexion.cursor() else: cursor = conexion.cursor(buffered=True) # buffered así sé la cantidad que son antes de iterarlos if agregar_sexuales: consulta_sexuales_tweets = "" consulta_limite_sexuales = "" else: consulta_sexuales_tweets = "censurado_tweet = 0" consulta_limite_sexuales = "AND " + consulta_sexuales_tweets consulta_sexuales_features = consulta_sexuales_tweets if limite: consulta = "SELECT id_tweet FROM tweets WHERE evaluacion = 0 " + consulta_limite_sexuales + " ORDER BY RAND() LIMIT "\ + unicode(limite) cursor.execute(consulta) bar = IncrementalBar("Eligiendo tweets aleatorios\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR) bar.next(0) ids = [] for (tweet_id,) in cursor: ids.append(tweet_id) bar.next() bar.finish() str_ids = '(' + unicode(ids).strip('[]L') + ')' consulta_prueba_tweets = "T.id_tweet IN {ids}".format(ids=str_ids) consulta_prueba_features = "id_tweet IN {ids}".format(ids=str_ids) else: consulta_prueba_features = "" consulta_prueba_tweets = "" if not agregar_sexuales and limite: restricciones_tweets = "WHERE " + consulta_sexuales_tweets + " AND " + consulta_prueba_tweets restricciones_features = "WHERE " + consulta_sexuales_features + " AND " + consulta_prueba_features elif not agregar_sexuales: restricciones_tweets = "WHERE " + consulta_sexuales_tweets restricciones_features = "WHERE " + consulta_sexuales_features elif limite: restricciones_tweets = "WHERE " + consulta_prueba_tweets restricciones_features = "WHERE " + consulta_prueba_features else: restricciones_tweets = "" restricciones_features = "" if DB_ENGINE == 'sqlite3': consulta = """ SELECT id_account, T.id_tweet, text_tweet, favorite_count_tweet, retweet_count_tweet, eschiste_tweet, censurado_tweet, name_account, followers_count_account, evaluacion, votos, votos_humor, promedio_votos, categoria_tweet FROM tweets AS T NATURAL JOIN twitter_accounts LEFT JOIN (SELECT id_tweet, Avg(voto) AS promedio_votos, Count(*) AS votos, Count(case when voto <> 'x' then 1 else NULL end) AS votos_humor FROM votos WHERE voto <> 'n' GROUP BY id_tweet) V ON ( V.id_tweet = T.id_tweet ) {restricciones} """.format(restricciones=restricciones_tweets) else: consulta = """ SELECT id_account, T.id_tweet, text_tweet, favorite_count_tweet, retweet_count_tweet, eschiste_tweet, censurado_tweet, name_account, followers_count_account, evaluacion, votos, votos_humor, promedio_votos, categoria_tweet FROM tweets AS T NATURAL JOIN twitter_accounts LEFT JOIN (SELECT id_tweet, Avg(voto) AS promedio_votos, Count(*) AS votos, Count(If(voto <> 'x', 1, NULL)) AS votos_humor FROM votos WHERE voto <> 'n' GROUP BY id_tweet) V ON ( V.id_tweet = T.id_tweet ) {restricciones} """.format(restricciones=restricciones_tweets) cursor.execute(consulta) bar = IncrementalBar("Cargando tweets\t\t\t", max=(999999 if DB_ENGINE == 'sqlite3' else cursor.rowcount), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) resultado = {} for (id_account, tweet_id, texto, favoritos, retweets, es_humor, censurado, cuenta, seguidores, evaluacion, votos, votos_humor, promedio_votos, categoria) in cursor: tweet = Tweet() tweet.id = tweet_id tweet.texto_original = texto tweet.texto = texto tweet.favoritos = favoritos tweet.retweets = retweets tweet.es_humor = es_humor tweet.es_chiste = es_humor tweet.censurado = censurado tweet.cuenta = cuenta tweet.seguidores = seguidores tweet.evaluacion = evaluacion tweet.categoria = categoria if votos: tweet.votos = int(votos) # Esta y la siguiente al venir de count y sum, son decimal. if votos_humor: tweet.votos_humor = int(votos_humor) if promedio_votos: tweet.promedio_de_humor = promedio_votos resultado[tweet.id] = tweet bar.next() bar.finish() if cargar_features: consulta = """ SELECT id_tweet, nombre_feature, valor_feature FROM features NATURAL JOIN tweets {restricciones} """.format(restricciones=restricciones_features) cursor.execute(consulta) bar = IncrementalBar("Cargando features\t\t", max=(9999999 if DB_ENGINE == 'sqlite3' else cursor.rowcount), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for (id_tweet, nombre_feature, valor_feature) in cursor: if id_tweet in resultado: resultado[id_tweet].features[nombre_feature] = valor_feature bar.next() bar.finish() cursor.close() conexion.close() return list(resultado.values())
#!/usr/bin/env python2 # coding=utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import os import sys from progress.bar import IncrementalBar sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from clasificador.herramientas.define import SUFIJO_PROGRESS_BAR import clasificador.herramientas.utils if __name__ == "__main__": largo = 1000 bar = IncrementalBar('Calculando', max=largo, suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for i in xrange(largo): clasificador.herramientas.utils.ejecutar_comando("echo 1") bar.next() bar.finish()
def scrapezillowdata(zillow_urls, header_input): # Initialize progress bar bar = IncrementalBar(" Scraping Zillow", max=len(zillow_urls)) # Initialize list to store home data during loop over each home's Zillow url home_data_list = [] # Loop over each home Zillow URL and scrape pertinent details for url in zillow_urls: # First, obtain the HTML from the current home Zillow URL using gethtml.py home_html = gethtml(url, header_input) # The home address is simply taken directly from its own URL. home_address = (url.replace("https://www.zillow.com/homedetails/", "").replace("-", " ").split("/", 1)[0]) # First, we search for the home's sell price. In Zillow, this variable is under a # "span" class="ds-status-details" tag. The find method will find this variable and store it into a tag # (i.e. ds_status_details). Generally, Zillow will show "Sold" and the sell price in this tag. Therefore, we # check this tag for the key word "sold" that we know will generally be contained in the tag's text. If the key # word is found in the tag's text, then we store the text found in the tag into the appropriate variable while # removing the unwanted characters. If the key word is not found, then the appropriate variable will retain its # initialization value of "n/a". ds_status_details = home_html.find("span", class_="ds-status-details") sold_price = "n/a" if "sold" in ds_status_details.text.lower(): sold_price = (ds_status_details.text.replace("Sold", "").replace( ": $", "").replace(",", "")) # Next, we search for the number of beds, baths, and the home's square footage. In Zillow, each one of these # variables is under a "span" class="ds-bed-bath-living-area" tag. The find_all method will find each one of # these variables and store them into a result set (i.e. ds_bed_bath_living_area). Each item of the result set # will either contain number of beds and "bd", number of baths and "ba", or the home's size and "Square Feet". # We loop over the result set checking each item for key words that we know will be contained in # the item's text. If the key word is found in the item's text, then we store the text found in the item into # the appropriate variable while removing the unwanted characters. If the key word is not found, then the # appropriate variable will retain its initialization value of "n/a". ds_bed_bath_living_area = home_html.find_all( "span", class_="ds-bed-bath-living-area") beds = "n/a" baths = "n/a" size = "n/a" for item in ds_bed_bath_living_area: if "bd" in item.text.lower(): beds = item.text.replace(" bd", "") continue if "ba" in item.text.lower(): baths = item.text.replace(" ba", "") continue if "square feet" in item.text.lower() or "sqft" in item.text.lower( ): size = item.text.replace(",", "").replace("Square Feet", "sqft") continue # Next, we search for the home type, year built, heating, cooling, parking, and lot size. In Zillow, each one of # these variables is under a "li" class="ds-home-fact-list-item" tag. The find_all method will find each one of # these variables and store them into a result set (i.e. ds_home_fact_list_items). Each item of the result set # has a child "span" class="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" tag (i.e. the "label" tag) # AND a child "span" class="Text-c11n-8-11-1__aiai24-0 hqfqED" tag (i.e. the "value" tag). # For example, for "home type" information (generally the first item in the result set), there will be a # "label" tag that will contain the text "Type" and there will be a "value" tag that will contain the text # "Single Family". We loop over the result set checking each item's "label" tag for key words that we know will # be contained in that tag. If the key word is found in the item's "label" tag, then we store the text found in # the item's adjacent "value" tag into the appropriate variable while removing the unwanted characters. # If the key word is not found, then the appropriate variable will retain its initialization value of "n/a". ds_home_fact_list_items = home_html.find_all( "li", class_="ds-home-fact-list-item") home_type = "n/a" year_built = "n/a" heating = "n/a" cooling = "n/a" parking = "n/a" lot_size = "n/a" for item in ds_home_fact_list_items: if ("type" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): home_type = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("year built" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): year_built = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("heating" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): heating = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("cooling" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): cooling = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("parking" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): parking = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text continue if ("lot" in item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" ).text.lower()): lot_size = item.find( "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text.replace( ",", "") continue # Append home data information to list home_data_list.append([ home_address, sold_price, beds, baths, size, home_type, year_built, heating, cooling, parking, lot_size, ]) bar.next() # to advance progress bar bar.finish() # to finish the progress bar print() # to add space following progress bar # Convert home_data_list into pandas dataframe. home_data = list2frame(home_data_list) return home_data
def check_rds_instance(rds_name, states, connection, auto_name): # Create RDS client rds = boto3.client('rds') print( '\n' + tag + 'Creating Database\nPlease wait as it typically takes 10-15 minutes before an instance is available.' ) # Create progress bar and continuously update it bar = IncrementalBar(rds_name, max=len(states), suffix='') while True: global creating global backing_up global available global monitoring global logging global count # Check RDS instance response = rds.describe_db_instances(DBInstanceIdentifier=rds_name) instances = response.get('DBInstances') status = instances[0].get('DBInstanceStatus').title() # Handle 'Creating' status if status == 'Creating' and not creating: creating = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Handle 'Backing-Up' status elif status == 'Backing-Up' and not backing_up: backing_up = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Handle 'Available' status elif status == 'Available' and not available: available = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) break # Handle 'Configuring-Enhanced-Monitoring' status elif status == 'Configuring-Enhanced-Monitoring' and not monitoring: monitoring = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Handle 'Configuring-Log-Exports' status elif status == 'Configuring-Log-Exports' and not logging: logging = True bar.next() count += 1 print(str(count) + '/' + str(len(states)) + ' | Status: ' + status, end='\r', flush=True) # Sleep for 30 seconds between checks time.sleep(30) # Finish progress bar bar.finish() # Check for schema and grab endpoint check_schema = False while not check_schema: # Automatically create postgresql based on auto_name if auto_name: check_schema = True response = rds.describe_db_instances(DBInstanceIdentifier=rds_name) instances = response.get('DBInstances') endpoint = instances[0].get('Endpoint').get('Address') cps.create_postgres_sql(rds_name, auto_name, endpoint, connection) # Ask for schema file if auto_name not provided else: print('\n' + tag + 'Database Ready\n' + 'Please specify the schema filename (excluding .json):', end=' ') schema_name = input() if schema_name != '' and not schema_name.endswith('.json'): check_schema = True response = rds.describe_db_instances( DBInstanceIdentifier=rds_name) instances = response.get('DBInstances') endpoint = instances[0].get('Endpoint').get('Address') cps.create_postgres_sql(rds_name, schema_name, endpoint, connection) # Handle invalid input else: print( Style.BRIGHT + 'Invalid entry. Please enter a valid schema filename exlcuding the ".json" extension.\n' )
def download(self): bar = IncrementalBar('Downloading ', max=10) self.driver.get(CME_LINK + '/tools-information/quikstrike/options-calendar.html') first_window = self.driver.window_handles[0] bar.next() sleep(5) self.driver.get( CME_TOOLS_LINK + '/User/QuikStrikeView.aspx?viewitemid=IntegratedCMEOptionExpirationCalendar' ) bar.next() sleep(5) self.driver.find_element_by_xpath( '//a[@id="MainContent_ucViewControl_IntegratedCMEOptionExpirationCalendar_ucViewControl_hlCMEProducts"]' ).click() bar.next() sleep(5) for handle in self.driver.window_handles: if handle != first_window: self.driver.switch_to_window(handle) self.driver.find_element_by_xpath( '//a[@id="ctl00_cphMain_lvTabs_ctrl3_lbTab"]').click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//a[@id="cphMain_ucProductBrowser_ucProductFilter_ucTrigger_lnkTrigger"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//input[@id="cphMain_ucProductBrowser_ucProductFilter_ucGroupList_rblGroups_4"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//input[@id="cphMain_ucProductBrowser_ucProductFilter_ucContractTypeList_rblContractType_1"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//input[@id="cphMain_ucProductBrowser_ucProductFilter_btnApply"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//a[@id="cphMain_ucProductBrowser_ucProductActions_ucTrigger_lnkTrigger"]' ).click() bar.next() sleep(3) self.driver.find_element_by_xpath( '//a[@id="cphMain_ucProductBrowser_ucProductActions_lnkExport"]' ).click() bar.next() # self.driver.find_element_by_xpath( # '//a[@id="cphMain_ucProductBrowser_ucProductActions_lnkShowExpirations"]').click() # bar.next() # sleep(4) # iframe = self.driver.find_element_by_xpath('//iframe[@id="mainFrame"]') # self.driver.switch_to_frame(iframe) # bar.next() # sleep(4) # self.driver.find_element_by_xpath('//a[@id="ctl03_ucExport_lnkTrigger"]').click() # bar.next() sleep(5) bar.finish()
if include_wireframe: mlab.triangular_mesh(x, y, z, faces, color=(0, 0, 0), representation='wireframe') mlab.show() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) v, f = sess.run([verts, faces]) vis_mesh(v, f) for _ in range(n2): try: bar = IncrementalBar(max=n1) except NameError: bar = None for i in range(n1): if bar is not None: bar.next() sess.run(opt) # print(loss_val) v, f, loss_val = sess.run([verts, faces, loss]) vis_mesh(v, f) print(loss_val) if bar is not None: bar.finish()
def hyperpack(hyperpack_list): ''' Install Large Packs Of Applications And Packages ''' os_bar = IncrementalBar('Getting Operating System...', max=1) os_bar.next() installer = Installer() updater = Updater() cleaner = Uninstaller() hyperpacks = hyperpack_list.split(',') password = "" if platform == 'linux' or platform == 'darwin': password = getpass('Enter your password: '******'\n') password_bar = IncrementalBar('Verifying Password...', max=1) exitcode = is_password_valid(password) if exitcode == 1: click.echo('Wrong Password Entered... Aborting Installation!') return password_bar.next() click.echo('\n') if platform == 'linux': for hyperpack in hyperpacks: hyper_pack = hyperpkgs[hyperpack] packages = hyper_pack.packages.split(',') apps = hyper_pack.applications.split(',') # Installing Required Packages for package in packages: installer.install_task( devpackages_linux[package], f'sudo -S apt-get install -y {package}', password, f'{package} --version', [f'{devpackages_linux[package]} Version']) # Installing Required Applications for app in apps: installer.install_task( applications_linux[app], f'sudo -S snap install --classic {app}', password, '', []) # Updating Required Packages for package in packages: updater.updatepack(package, password) for app in apps: updater.updateapp(app, password) cleaner.clean(password) elif platform == 'win32': for hyperpack in hyperpacks: hyper_pack = hyperpkgs[hyperpack] packages = hyper_pack.packages.split(',') apps = hyper_pack.applications.split(',') for package in packages: installer.install_task( package_name=devpackages_windows[package], script=f'choco install {package} -y', password="", test_script=f'{package} --version', tests_passed=[f'{devpackages_windows[package]} Version']) for package in packages: updater.updatepack(package, password="") for app in apps: installer.install_task(package_name=applications_windows[app], script=f'choco install {app} -y', password="", test_script='', tests_passed=[]) for app in apps: updater.updateapp(app, password="") elif platform == 'darwin': for hyperpack in hyperpacks: hyper_pack = hyperpkgs[hyperpack] packages = hyper_pack.packages.split(',') apps = hyper_pack.applications.split(',') for package in packages: installer.install_task( package_name=devpackages_macos[package], script=f'brew install {package}', password="", test_script=f'{package} --version', tests_passed=[f'{devpackages_macos[package]} Version']) for package in packages: updater.updatepack(package, password="") for app in apps: installer.install_task(package_name=applications_macos[app], script=f'brew cask install {app}', password="", test_script='', tests_passed=[]) for app in apps: updater.updateapp(app, password="")
def populate_database(video_directory, embedder, pose_estimator, cursor, num_people=None, frames_per_person=None): folders = [ f for f in os.listdir(video_directory) if os.path.isdir(os.path.join(video_directory, f)) ] for person_number, folder in enumerate(folders): if person_number == num_people: return frame_info = os.path.join(video_directory, folder + '.labeled_faces.txt') with open(frame_info) as info_file: csv_data = csv.reader(info_file, delimiter=',') embedding = None csv_data = list(csv_data) num_frames = len(csv_data) if frames_per_person: num_frames = min(len(csv_data), frames_per_person) bar = IncrementalBar( f'Adding person {person_number + 1:>3} of {num_people:>3}', max=num_frames) frame_indices = np.arange(len(csv_data)) if frames_per_person and len(csv_data) > frames_per_person: frame_indices = np.linspace(0, len(csv_data) - 1, num=frames_per_person) frame_indices = frame_indices.astype(np.uint8) for frame_num in frame_indices: image_path = csv_data[frame_num][0].replace('\\', '/') image_path = os.path.join(video_directory, image_path) image = Image.open(image_path) image = crop_to_face(image) if image is None: bar.next() continue if embedding is None: embedding = embedder.embed(image) embedding = embedding.flatten() c.execute( 'INSERT INTO videos (id, embedding) values' + ' (?, ?)', (person_number, embedding)) pose = pose_estimator.estimate_pose(image) landmarks = get_normalized_landmarks(image) if landmarks is None: bar.next() continue c.execute( 'INSERT INTO frames (video_id, image_path, pose, landmarks)' + ' values (?, ?, ?, ?)', (person_number, image_path, pose, landmarks)) bar.next() print()
class AuthorCrawler: visitedProfileURL = [] queueProfileURL = [] visitedArticleURL = [] queueArticleURL = [] numberOfCrawlerProfile = 0 def __init__(self): self.baseURL = 'https://www.researchgate.net/' from progress.bar import IncrementalBar self.progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_PROFILE, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s') def crawl(self): self.queueProfileURL.extend(START_PAGES) os.makedirs(AFTER_CRAWL_AUTHOR_DIR, exist_ok=True) while self.numberOfCrawlerProfile < MIN_NUMBER_OF_PROFILE: while len(self.queueProfileURL) == 0: if len(self.queueArticleURL) == 0: self.progress_bar.finish() return try: self.queueProfileURL.extend(filter(lambda x: x not in self.visitedProfileURL and x not in self.queueProfileURL,self.getAuthorFromArticle(self.queueArticleURL.pop(0)))) except: pass try: self.progress_bar.next() self.crawlProfile(self.queueProfileURL.pop(0)) except: pass self.progress_bar.finish() def getAuthorFromArticle(self, url): r = requests.get(url) s = BeautifulSoup(r.text, 'html.parser') authors = s.findAll('a', class_='display-name') authorsList = [] for author in authors: authorsList.append(self.baseURL +author['href']) return authorsList def getArticleIDFromURL(self, url): return re.findall(r'publication/(?P<id>\d+)_', url)[0] def crawlProfile(self, profURL): if not profURL.endswith('publications'): profURL += '/publications' r = requests.get(profURL) s = BeautifulSoup(r.text, 'html.parser') name = s.find('h1', class_='profile-header-name') name = name.text n = 1 articles = [] while True: url = profURL+'/'+n.__str__() n+=1 res = self.parseProfilePage(url) if res is None or len(res) == 0: break articles.extend(res) self.queueArticleURL.extend(filter(lambda x: x not in self.visitedArticleURL and x not in self.queueArticleURL,map(lambda x : x[0],articles))) js = {} js['Name'] = name js['Article'] = articles file_name = '{}.json'.format(name) with open(os.path.join(AFTER_CRAWL_AUTHOR_DIR , file_name), 'w') as outfile: json.dump(js, outfile) self.numberOfCrawlerProfile +=1 print(self.numberOfCrawlerProfile) def parseProfilePage(self, url): # return top 10 article url r = requests.get(url) s = BeautifulSoup(r.text, 'html.parser') articles = s.findAll('a', class_='ga-publication-item') result = [] for article in articles: result.append((self.baseURL + article['href'], self.getArticleIDFromURL(article['href']))) return result
def prepare_datasets(datapath, workdirpath, dataset_type, image_shape=(256, 256), input_mask=None, fraction=None, n_slice_per_file=None, realimag_img=True, realimag_kspace=True, kspace_norm=None, img_norm=None): """ Prepares the work directory, and prepares data into easy-to-used, eventually masked data. """ # getting the list of usable files files = [ f for f in os.listdir(datapath) if (os.path.isfile(os.path.join(datapath, f)) and ('.h5' in f)) ] if fraction: files = files[:int(np.floor(fraction * len(files)))] output_dir = os.path.join(workdirpath, dataset_type) if not os.path.isdir(output_dir): os.mkdir(output_dir) index_dict = {} if files != []: bar = IncrementalBar('{} dataset_files'.format(dataset_type), max=len(files)) for f in files: filepath = os.path.join(output_dir, f) if os.path.isfile(filepath): continue h5f = h5py.File(os.path.join(datapath, f), 'r') if ('kspace' not in h5f): continue if n_slice_per_file is None: n_slices = h5f['kspace'].shape[0] else: n_slices = n_slice_per_file index_dict[filepath] = n_slices kdata_array = np.empty((n_slices, *image_shape, 2)) kdata_clean_array = np.empty((n_slices, *image_shape, 2)) image_array = np.empty((n_slices, *image_shape, 2)) image_clean_array = np.empty((n_slices, *image_shape, 2)) mask_array = np.empty((n_slices, *image_shape)) inverse_mask_array = np.empty((n_slices, *image_shape)) k = 0 imin = int(np.floor(h5f['kspace'].shape[0] / 2 - n_slices / 2)) imax = imin + n_slices for i, kdata_raw in enumerate(h5f['kspace']): if i < imin or i >= imax: continue ### cropping image_clean = ifft(kdata_raw) image_clean = crop(image_clean, size=image_shape) #normalize image if img_norm['np']: image_clean = img_norm['np'](image_clean) kdata_clean = fft(image_clean) if kspace_norm['np']: kdata_clean = kspace_norm['np'](kdata_clean) ### apply mask if input_mask: mask = input_mask.get_mask(kdata_clean) kdata = kdata_clean * mask + 0.0 mask_array[k, :, :] = mask inverse_mask = mask == 0 inverse_mask = inverse_mask.astype(np.float) inverse_mask_array[k, :, :] = inverse_mask else: kdata = kdata_clean image = ifft(kdata) image_clean = ifft(kdata_clean) # filling arrays if realimag_kspace: kdata_array[k, :, :, 0] = np.real(kdata) kdata_array[k, :, :, 1] = np.imag(kdata) kdata_clean_array[k, :, :, 0] = np.real(kdata_clean) kdata_clean_array[k, :, :, 1] = np.imag(kdata_clean) else: kdata_array[k, :, :, 0] = np.abs(kdata) kdata_array[k, :, :, 1] = np.angle(kdata) kdata_clean_array[k, :, :, 0] = np.abs(kdata_clean) kdata_clean_array[k, :, :, 1] = np.angle(kdata_clean) if realimag_img: image_array[k, :, :, 0] = np.real(image) image_array[k, :, :, 1] = np.imag(image) image_clean_array[k, :, :, 0] = np.real(image_clean) image_clean_array[k, :, :, 1] = np.imag(image_clean) else: image_array[k, :, :, 0] = np.abs(image) image_array[k, :, :, 1] = np.angle(image) image_clean_array[k, :, :, 0] = np.abs(image_clean) image_clean_array[k, :, :, 1] = np.angle(image_clean) k += 1 h5f.close() outfile = h5py.File(filepath, 'w') outfile.create_dataset('kspace_masked', data=kdata_array) outfile.create_dataset('kspace_ground_truth', data=kdata_clean_array) outfile.create_dataset('image_masked', data=image_array) outfile.create_dataset('image_ground_truth', data=image_clean_array) outfile.create_dataset('mask', data=mask_array) outfile.create_dataset('inverse_mask', data=inverse_mask_array) outfile.close() bar.next() if index_dict != {}: with open(os.path.join(output_dir, 'index.json'), 'w') as fp: json.dump(index_dict, fp) with open(os.path.join(output_dir, 'format.json'), 'w') as fp: json.dump((*image_shape, 2), fp) return output_dir
def install_task(self, package_name : str, script : str, password : str, test_script : str, tests_passed): try: installer_progress = Spinner(message=f'Installing {package_name}...', max=100) # sudo requires the flag '-S' in order to take input from stdin for _ in range(1, 75): time.sleep(0.01) installer_progress.next() proc = Popen(script.split(), stdin=PIPE, stdout=PIPE, stderr=PIPE)\ # Popen only accepts byte-arrays so you must encode the string output, error = proc.communicate(password.encode()) if proc.returncode != 0: click.echo(click.style('❎ Installation Failed... ❎', fg='red', blink=True, bold=True)) debug = click.prompt('Would you like us to debug the failed installation?[y/n]') if debug == 'y': debugger = Debugger() debugger.debug(password, error) logs = click.prompt('Would you like to see the logs?[y/n]', type=str) if logs == 'y': final_output = error.decode('utf-8') if final_output == '': click.echo('There were no logs found...') return else: click.echo(final_output) return return else: logs = click.prompt('Would you like to see the logs?[y/n]', type=str) if logs == 'y': final_output = output.decode('utf-8') if final_output == '': click.echo('There were no logs found...') return else: click.echo(final_output) return return click.echo(click.style(f'\n\n 🎉 Successfully Installed {package_name}! 🎉 \n', fg='green', bold=True)) # Testing the successful installation of the package testing_bar = IncrementalBar('Testing package...', max = 100) if tests_passed == [] and test_script == '': click.echo('\n') click.echo(click.style(f'Test Passed: {package_name} Launch ✅\n', fg='green')) return for _ in range(1, 21): time.sleep(0.002) testing_bar.next() os.system('cd --') for _ in range(21, 60): time.sleep(0.002) testing_bar.next() proc = Popen(test_script.split(), stdin=PIPE, stdout=PIPE, stderr=PIPE) for _ in range(60, 101): time.sleep(0.002) testing_bar.next() click.echo('\n') for test in tests_passed: click.echo(click.style(f'Test Passed: {test} ✅\n', fg='green')) return except subprocess.CalledProcessError as e: click.echo(e.output) click.echo('An Error Occurred During Installation...', err = True)
def run(self, http_method): from progress.bar import IncrementalBar log20x = logging.getLogger("log20x") log40x = logging.getLogger("log40x") err_log = logging.getLogger("err_logger") case_total = self.sample.sample_total() * self.payload.sample_total() bar = IncrementalBar(u'RUNNING', max=case_total) # executor = ThreadPoolExecutor(max_workers=100) with ThreadPoolExecutor(max_workers=100) as executor: # for fn, ln, test_url in self._generate_url_req(): # bar.next() # try: # r = self.RequestMethod[http_method](test_url, headers={ # "User-Agent": r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36", # "WAF-Test-Case-ID": "%s (%d)" % (fn, ln) # }) # if r.status_code / 200 == 1: # log20x.info("[%d] %s (%s:%d)" % (r.status_code, test_url, fn, ln)) # elif r.status_code >= 500: # err_log.error("[%d] %s" % (r.status_code, test_url)) # else: # log40x.info("[%d] %s" % (r.status_code, test_url)) # # except requests.exceptions.ConnectionError, e: # err_log.error("[%s] %s" % (e, test_url)) # self.test_total += 1 # bar.finish() chrome_ua = r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36" fe = [] for fn, ln, test_url in self._generate_url_req(): test_header = { "User-Agent": chrome_ua, "Waf-Test-Case": "%s:%d" % (os.path.basename(fn), ln) } fe.append( executor.submit(self.RequestMethod[http_method], test_url, headers=test_header)) for f in as_completed(fe): try: r = f.result() test_case_id = 'unknown' test_url = r.request.url if 'Waf-Test-Case' in r.headers: test_case_id = r.headers['Waf-Test-Case'] bar.next() if r.status_code / 200 == 1: log20x.info("[%d] %s (%s)" % (r.status_code, test_url, test_case_id)) elif r.status_code >= 500: err_log.error("[%d] %s (%s)" % (r.status_code, test_url, test_case_id)) else: log40x.info("[%d] %s (%s)" % (r.status_code, test_url, test_case_id)) self.test_total += 1 except requests.exceptions.ConnectionError, e: err_log.error("%s" % e)
missing = json.loads(fault.data) elif isinstance(fault.data, types.ListType): missing = fault.data if '' in missing: del missing[missing.index(''):] bar = IncrementalBar('Uploading', max=len(missing)) bar.suffix = '%(percent).1f%% - %(eta)ds' with open(path) as fp: for hash in missing: offset = hashes.index(unhexlify(hash)) * blocksize fp.seek(offset) block = fp.read(blocksize) client.update_container_data(container, StringIO(block)) bar.next() bar.finish() return client.create_object_by_hashmap(container, object, map, **kwargs) def download(client, container, object, path): res = client.retrieve_object_hashmap(container, object) blocksize = int(res['block_size']) blockhash = res['block_hash'] bytes = res['bytes'] map = res['hashes'] if os.path.exists(path): h = HashMap(blocksize, blockhash)
os.makedirs(tempDir, exist_ok=True) dir_ = os.path.join(args.path, "") print(f"{Fore.GREEN}All settings valid, proceeding...") print(f"Downloading {filename[0]}") chunkSize = 10240 try: r = requests.get(url + filename[0], stream=True) with open(tempDir + filename[0], "wb") as f: pbar = IncrementalBar( "Downloading", max=int(r.headers["Content-Length"]) / chunkSize, suffix="%(percent)d%%", ) for chunk in r.iter_content(chunk_size=chunkSize): if chunk: # filter out keep-alive new chunks pbar.next() f.write(chunk) pbar.finish() except Exception: print(f"Download {Fore.RED}failed, please try again. Exiting.") sys.exit() print(f"Download {Fore.GREEN}done") # Extraction spinnerExtract = Spinner("Extracting... ") spinnerExtract.start() try: shutil.unpack_archive(tempDir + filename[0], tempDir) except Exception: print(f"Extraction {Fore.RED}failed, please try again. Exiting.") exit()
def find_solutions(self, graph_setting_groups): results = {} # check for solutions for a specific set of interaction settings logging.info("Number of interaction settings groups being processed: " + str(len(graph_setting_groups))) for strength, graph_setting_group in sorted( graph_setting_groups.items(), reverse=True): logging.info("processing interaction settings group with " "strength " + str(strength)) logging.info(str(len(graph_setting_group)) + " entries in this group") logging.info("running with " + str(self.number_of_threads) + " threads...") temp_results = [] bar = IncrementalBar('Propagating quantum numbers...', max=len(graph_setting_group)) bar.update() if self.number_of_threads > 1: with Pool(self.number_of_threads) as p: for result in p.imap_unordered( self.propagate_quantum_numbers, graph_setting_group, 1): temp_results.append(result) bar.next() else: for graph_setting_pair in graph_setting_group: temp_results.append(self.propagate_quantum_numbers( graph_setting_pair)) bar.next() bar.finish() logging.info('Finished!') if strength not in results: results[strength] = [] results[strength].extend(temp_results) for k, v in results.items(): logging.info( "number of solutions for strength (" + str(k) + ") after qn propagation: " + str(sum([len(x[0]) for x in v]))) # remove duplicate solutions, which only differ in the interaction qn S results = remove_duplicate_solutions(results, self.filter_remove_qns, self.filter_ignore_qns) node_non_satisfied_rules = [] solutions = [] for result in results.values(): for (tempsolutions, non_satisfied_laws) in result: solutions.extend(tempsolutions) node_non_satisfied_rules.append(non_satisfied_laws) logging.info("total number of found solutions: " + str(len(solutions))) violated_laws = [] if len(solutions) == 0: violated_laws = analyse_solution_failure(node_non_satisfied_rules) logging.info("violated rules: " + str(violated_laws)) # finally perform combinatorics of identical external edges # (initial or final state edges) and prepare graphs for # amplitude generation match_external_edges(solutions) final_solutions = [] for sol in solutions: final_solutions.extend( perform_external_edge_identical_particle_combinatorics(sol) ) return (final_solutions, violated_laws)
def main(): parser = argparse.ArgumentParser() help_text_language = 'The language to which to translate e.g. "nl"' help_text_language += '\nCheck for language codes:' help_text_language += '\nhttps://sites.google.com/site/tomihasa/google-language-codes' help_text_service = 'The translation service to use; google (default) or deepl' parser.add_argument('-file', help='SRT subtitle file to translate') parser.add_argument('-language', help=help_text_language) parser.add_argument('-service', help=help_text_service) args = parser.parse_args() if args.file is None or args.language is None: parser.print_help() print('') raise SyntaxError('One or more argument is missing') t0 = time.clock() input_file_name = args.file language = args.language try: print('\n') if args.service == 'deepl': print('Using www.deepl.com translation service.') srt_translator = DeeplTranslator(language) else: print('Using translate.google.com translation service.') srt_translator = GoogleTranslator(language) output_file_name, file_extension = path.splitext(input_file_name) output_file_name = output_file_name + '.' + args.language + file_extension file_encoding = get_file_encoding(args.file) print('Input file: {}'.format(input_file_name)) print('Input file encoding: {}'.format(file_encoding)) print('Output file: {}\n'.format(output_file_name)) input_file = open(args.file, "r", encoding=file_encoding) input_file_data = input_file.read() subs = list(srt.parse(input_file_data)) progress_bar = IncrementalBar('Translating', max=len(subs)) for sub in subs: merge_is_needed = sub_merge_needed(sub.content) if merge_is_needed: text_to_be_translated, newline_count = remove_newline_char_from_line( sub.content) line_to_add_newlines = srt_translator.translate( text_to_be_translated) sub.content = add_newline_char_to_line(line_to_add_newlines, newline_count) else: sub.content = srt_translator.translate(sub.content) # print('translated-sub: {}'.format(sub.content)) progress_bar.next() progress_bar.finish() srt_translation = srt.compose(subs) output_file = open(output_file_name, "w", encoding='utf-8') output_file.write(srt_translation) t1 = time.clock() print('\nSuccessfully translated the SRT file.') print('This translation took {:.2F} seconds to complete.'.format(t1 - t0)) print('Output saved as: {}'.format(output_file_name)) except Exception as Exc: print('\nOperation failed due to an exception.')
def tweets_parecidos_con_distinto_humor(corpus): print("Buscando tweets muy parecidos pero con distinto valor de humor...") parecidos_con_distinto_humor = set() ids_parecidos_con_distinto_humor = cargar_parecidos_con_distinto_humor() if ids_parecidos_con_distinto_humor: corpus_por_id = {tweet.id: tweet for tweet in corpus} for id_tweet_humor, id_tweet_no_humor in ids_parecidos_con_distinto_humor: parecidos_con_distinto_humor.add((corpus_por_id[id_tweet_humor], corpus_por_id[id_tweet_no_humor])) else: subcorpus_cuentas_de_humor = [] subsubcorpus_cuentas_de_humor_humor = [] subsubcorpus_cuentas_de_humor_no_humor = [] for tweet in corpus: if tweet.es_chiste: subcorpus_cuentas_de_humor.append(tweet) if tweet.es_humor: subsubcorpus_cuentas_de_humor_humor.append(tweet) else: subsubcorpus_cuentas_de_humor_no_humor.append(tweet) subsubcorpus_cuentas_de_humor_no_humor_por_largo = defaultdict(list) bar = IncrementalBar("Tokenizando\t\t\t", max=len(subcorpus_cuentas_de_humor), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet_cuenta_humor in subcorpus_cuentas_de_humor: tweet_cuenta_humor.oraciones = Freeling.procesar_texto(tweet_cuenta_humor.texto_original) tweet_cuenta_humor.tokens = list(itertools.chain(*tweet_cuenta_humor.oraciones)) bar.next() bar.finish() for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor: subsubcorpus_cuentas_de_humor_no_humor_por_largo[len(tweet_no_humor.tokens)].append(tweet_no_humor) bar = IncrementalBar("Buscando en tweets\t\t", max=len(subsubcorpus_cuentas_de_humor_humor), suffix=SUFIJO_PROGRESS_BAR) bar.next(0) for tweet_humor in subsubcorpus_cuentas_de_humor_humor: margen = int(round(len(tweet_humor.tokens) / 5)) largo_min = len(tweet_humor.tokens) - margen largo_max = len(tweet_humor.tokens) + margen for largo in range(largo_min, largo_max + 1): for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor_por_largo[largo]: if distancia_edicion(tweet_humor.tokens, tweet_no_humor.tokens)\ <= max(len(tweet_humor.tokens), len(tweet_no_humor.tokens)) / 5: parecidos_con_distinto_humor.add((tweet_humor, tweet_no_humor)) print('') print(tweet_humor.id) print(tweet_humor.texto_original) print("------------") print(tweet_no_humor.id) print(tweet_no_humor.texto_original) print("------------") print('') bar.next() bar.finish() guardar_parecidos_con_distinto_humor(parecidos_con_distinto_humor) return parecidos_con_distinto_humor
def scrape_collins(): global lx, lx_completed, lx_begins, lx_last_val global ly, ly_completed, ly_begins, ly_last_val global lz, lz_completed, lz_begins, lz_last_val #INITIALISE cache_file = read_cache() #SCRAPE METADATA if not lx_completed: print("Scraping meta-data") bar = IncrementalBar("Scraping stage 1/3", max=len(ascii_lowercase), suffix='%(percent).1f%% - %(index)s of %(max)s') for char in ascii_lowercase: data = BeautifulSoup(scraper.get( "https://www.collinsdictionary.com/browse/english/words-starting-with-" + char).content.decode("UTF-8"), features="html.parser") for d in data.body.find("ul", class_="columns2").find_all("a"): lx.append(d['href']) bar.next() cache_file.write("#0\n") for item in lx: cache_file.write(str(item) + "\n") cache_file.write("#END0\n") lx_completed = True cache_file.flush() bar.finish() else: print("Using cached data for stage 1/3.") #SCRAPE WORD LIST if not ly_completed: print("Building word list") if not ly_begins: cache_file.write("#1\n") data = BeautifulSoup(scraper.get( "https://www.collinsdictionary.com/browse/english/words-starting-with-digit" ).content.decode("UTF-8"), features="html.parser") for d in data.body.find("ul", class_="columns2").find_all("a"): ly.append(d['href']) cache_file.write(d['href'] + "\n") cache_file.flush() cache_file.close() cache_file = read_cache() bar = IncrementalBar("Scraping stage 2/3", max=len(lx), suffix='%(percent).1f%% - %(index)s of %(max)s') for url in lx: newrl = url.strip() newrl_c = strip_url(newrl) ly_last_val_c = strip_url(ly_last_val) if min(ly_last_val_c, newrl_c) == newrl_c: pass bar.next() else: data = BeautifulSoup( scraper.get(newrl).content.decode("UTF-8"), features="html.parser") for d in data.body.find("ul", class_="columns2").find_all("a"): ly.append(d['href']) cache_file.write(d['href'] + "\n") cache_file.flush() bar.next() cache_file.write("#END1\n") bar.finish() ly_completed = True cache_file.flush() else: print("Using cached data for layer 2/3.") #SCRAPE DICTIONARY if not (lx_completed and ly_completed): print("Something went awry. Forcing a restart.") print("Clearing local cache...", end="", flush=True) os.remove("cache.data") print(" done.", end="\n") else: print("Scraping dictionary...") checked_file = open("checked.txt", mode="a+") cache_file = read_cache() bar = IncrementalBar("Scraping stage 3/3", max=len(ly), suffix='%(percent).1f%% - %(index)s of %(max)s') if not lz_begins: cache_file.write("#2\n") for url in ly: newrl = url.strip() if min(strip_url(newrl), strip_url(lz_last_val.strip())) == strip_url(newrl): bar.next() pass else: bar.next() data = BeautifulSoup( scraper.get(newrl).content.decode("utf-8"), features="html.parser") essence = data.find_all("div", class_="dictentry dictlink") essence = str(essence) out_file.write(essence) out_file.flush() checked_file.write(strip_url(newrl) + "\n") checked_file.flush() cache_file.write(newrl + "\n") cache_file.flush() checked_file.flush() checked_file.close() out_file.flush() out_file.close() cache_file.write("#END2\n") cache_file.flush() cache_file.close() bar.finish() print(" done.")