def return_file_objects(self): """ Function that returnss a list of imgur_object that can be later iterated """ print('Loading images from {}'.format(self._link)) for links in progress_bar(self.generate_download_links(), unit='connections', total=self._supp_len, desc='Loading images'): try: req_obj = imgur_object(link=links, verbose=self.verbose) self.files.append(req_obj) except NetworkError as ne: print('NetworkError is raised in link {}'.format( links)) if self.verbose else None print(ne) except TypeError as te: print(te) else: pass print( 'Final list length is {length}, expencted length is {explength}.\nSuccessful rate is {percentage}' .format(length=len(self.files), explength=self._supp_len, percentage=(str(len(self.files) / self._supp_len * 100)) + '%')) if self.verbose else None return self.files
def adversarial_fit(model: tf.keras.Model, generator: tf.keras.Model, discriminator: tf.keras.Model, data_set, latent_dim: int, epochs: int, batch_size=256, silent: bool = False) -> None: """ Run Adversarial training of the model :param model: GAN model as joined generator and discriminator :param generator: generator model :param discriminator: discriminator model :param data_set: data set interface object generating desired data set :param latent_dim: dimension of latent space used to generate samples :param epochs: number of epochs to train for :param batch_size: number of data points in single batch :param silent: if False print progress bar """ n_batches_in_epoch = int(data_set.n_data_points / batch_size) for epoch in progress_bar(range(epochs), disable=silent): for batch_index in range(n_batches_in_epoch): # set-up data batch samples = generator( data_set.latent_batch(batch_size // 2, latent_dim=latent_dim)) x, y = data_set.batch(samples, size=batch_size) # train discriminator discriminator_loss, _ = discriminator.train_on_batch(x, y) # train generator inputs = data_set.latent_batch(size=batch_size, latent_dim=latent_dim) labels = np.ones([batch_size, 1]) model.train_on_batch(inputs, labels)
def build_features(self, args, raw_data): features = {} for split, data in raw_data.items(): self.split_feats = [] print(f"Building features for {split}") for convo in progress_bar(data, total=len(data)): so_far = [] for turn in convo['delexed']: speaker, utt = turn['speaker'], turn['text'] _, _, action, values, _ = turn['targets'] if speaker in ['agent', 'customer']: utt_str = f'{speaker}|{utt}' so_far.append(utt_str) else: # create a training example during every action context = so_far.copy() # [::-1] to reverse self.collect_examples(context, action, values) action_str = f'action|{action}' so_far.append(action_str) features[split] = self.split_feats return features
def get_embeddings(file_): embs = dict() with open(file_, 'r') as f: for l in progress_bar(f): l_split = l.strip().split() embs[l_split[0]] = [float(em) for em in l_split[1:]] print("Got {} embeddings from {}".format(len(embs), file_)) return embs
def run_eval(args, datasets, model, exp_logger, kb_labels, split='dev'): dataloader, num_examples = setup_dataloader(datasets, args.batch_size, split) exp_logger.start_eval(num_examples, kind=args.filename) loss_func = torch.nn.CrossEntropyLoss(ignore_index=-1) num_outputs = len(model.outputs) model.eval() preds, labels, convo_ids, turn_counts = [], [], [], [] for batch in progress_bar(dataloader, total=len(dataloader), desc=f"Epoch {exp_logger.epoch}"): batch = tuple(t.to(device) for t in batch) full_history, batch_targets, context_tokens, tools = prepare_inputs( args, batch) with torch.no_grad(): if args.task == 'ast': batch_scores = model(full_history, context_tokens) batch_loss = ast_loss(batch_scores, batch_targets, loss_func) elif args.task == 'cds': batch_scores = model(full_history, context_tokens, tools) batch_loss = cds_loss(batch_scores, batch_targets, loss_func) if args.cascade: batch_turn_count = batch_targets.pop() batch_convo_id = batch_targets.pop() if args.quantify or split == 'dev': exp_logger.eval_loss += batch_loss.mean().item() exp_logger.batch_steps += 1 preds.append(batch_scores) labels.append(batch_targets) convo_ids.append(batch_convo_id if args.cascade else 0) turn_counts.append(batch_turn_count if args.cascade else 0) if args.debug: if len(turn_counts) > 10: break grouped_preds = [ torch.cat([pred[i] for pred in preds], dim=0) for i in range(num_outputs) ] grouped_labels = [ torch.cat([label[i] for label in labels], dim=0) for i in range(num_outputs) ] ci_and_tc = (torch.cat(convo_ids, dim=0), torch.cat(turn_counts, dim=0)) if args.cascade else (0, 0) utils = {'kb_labels': kb_labels, 'ci_and_tc': ci_and_tc} metrics, res_name = quantify(args, grouped_preds, grouped_labels, utils) exp_logger.end_eval(metrics, kind=args.filename) return (metrics, res_name) if split == 'dev' else metrics
def micro_f1(self, classes, predictions, targets): true_positive, false_positive, false_negative, true_negative = 0,0,0,0 for cls in progress_bar(classes): for pred, tar in zip(predictions, targets): if pred == cls and tar == cls: true_positive += 1 elif pred == cls and tar != cls: false_positive += 1 elif pred != cls and tar == cls: false_negative += 1 elif pred != cls and tar != cls: true_negative += 1 micro = Tester.single_f1(true_positive, false_positive, false_negative) print("Micro average is {:.3f}".format(micro)) return micro
def preprocessing(): # Read the local CSV subject export classifications = pd.read_csv("classification-export.csv") classifications['metadata'] = classifications['metadata'].apply( lambda x: json.loads(x)) classifications['locations'] = classifications['locations'].apply( lambda x: json.loads(x)) # Include in subject_set_ids all subject sets you want to keep subject_set_ids = [] classifications = classifications.loc[ classifications['subject_set_id'].isin(subject_set_ids)] classifications['smooth'] = 0 classifications['features'] = 0 classifications['star'] = 0 # Copy data from metadata into correct/new CSV headers with progress bar with progress_bar(total=len(classifications)) as current_progress: for index, row in classifications.iterrows(): current_progress.update(1) for column in row['metadata']: # Find the Right Ascension and Declination in the metadata and assign to columns if column in ['ra', 'dec', '!ra', '!dec']: stripped_punctuation = column.strip(string.punctuation) classifications.loc[ index, stripped_punctuation] = row['metadata'][column] # Find the image name, titled 'iauname', in the metadata and assign to a column if column in ['iauname', '!iauname']: classifications.loc[index, 'filename'] = row['metadata'][column] for column in row['locations']: classifications.loc[index, 'image'] = row['locations'][column] # Drop unnecessary columns and rearrange classifications = classifications[[ 'subject_id', 'classifications_count', 'ra', 'dec', 'image', 'filename', 'smooth', 'features', 'star' ]] # Create a parsed CSV for DB import classifications.to_csv('parsed-subject-set.csv', index=False, encoding='utf-8')
def CompressFiles(files, name, zip_extension="tar.gz", overwrite=False): zipped_file = zipped_contents_folder / get_current_date( ) / f"{name}.{zip_extension}" if zipped_file.exists(): if overwrite: os.remove( zipped_file.absolute().as_posix()) # nu sterge, unfortunatelly else: return zipped_file else: if not zipped_file.parent.exists(): zipped_file.parent.mkdir(exist_ok=1) zip_file_client = z.ZipFile(zipped_file.as_posix(), "w", z.ZIP_DEFLATED) print( "\nUserWarnings for duplicate files in zip are supressed.\ncompressing ..." ) compress_progress = progress_bar(iterable=files, desc="compress", total=len(files), unit="file", ncols=120) for file in files: compress_progress.update(1) if file in zip_file_client.namelist(): print(f"{file} already exists...") continue if file == zipped_file.absolute().as_posix(): print(f"skipped: {file}") continue try: zip_file_client.write(file, compress_type=z.ZIP_DEFLATED) compress_progress.desc = file except (PermissionError, FileNotFoundError): pass compress_progress.close() zip_file_client.close() print(f"files compressed at: {zipped_file.as_posix()}") return zipped_file
def backup_to_harddrive(self, less_24_mode=True, zip_mode=True): # gather what you need and what is the mode files = [] external_drives = self.get_external_drives() if not external_drives: raise ConnectionError("there are no external hard drives ONLINE to backup to.") drive_chosen = external_drives[0] if len(external_drives) > 1: print(external_drives) while 1: try: index = int(input("choose drive to backup to:")) if 1 <= index <= len(external_drives): drive_chosen = external_drives[index - 1] break except ValueError: print("invalid index. repeat") before = time.time() print(f"\nBackup to: {drive_chosen} started at: {get_current_datetime()}\n") input("_____") for backup_file in progress_bar( iterable=files, desc="backup", total=len(files), unit="file", ncols=120 ): self.copy_file_to_harddrive(backup_file.file, backup_file.destination) print(f"\nBackup to: {drive_chosen} ended at: {get_current_datetime()}") execution_time = time.time() - before execution_time = seconds_to_time(int(execution_time)) print(f"\nbackup duration: [ {execution_time} ] seconds")
def macro_f1(self, classes, predictions, targets): total_f1 = [] for cls in progress_bar(classes): true_positive, false_positive, false_negative, true_negative = 0,0,0,0 for pred, tar in zip(predictions, targets): if pred == cls and tar == cls: true_positive += 1 elif pred == cls and tar != cls: false_positive += 1 elif pred != cls and tar == cls: false_negative += 1 elif pred != cls and tar != cls: true_negative += 1 f1 = Tester.single_f1(true_positive, false_positive, false_negative) if f1 >= 0: total_f1.append(f1) macro = np.average(total_f1) print("Macro average is {:.3f}".format(macro)) return macro
def build_features(self, args, raw_data): features = {} for split, data in raw_data.items(): self.split_feats = [] print(f"Building features for {split}") for convo in progress_bar(data, total=len(data)): so_far = [] for turn in convo['delexed']: speaker, text = turn['speaker'], turn['text'] utterance = f"{speaker}|{text}" if speaker == 'agent': context = so_far.copy() support_items = turn['candidates'], convo[ 'convo_id'], turn['turn_count'] self.collect_one_example(context, turn['targets'], support_items) so_far.append(utterance) elif speaker == 'action': context = so_far.copy() self.collect_examples(context, turn['targets'], convo['convo_id'], turn['turn_count']) so_far.append(utterance) else: so_far.append(utterance) context = so_far.copy() # the entire conversation end_targets = turn['targets'].copy() end_targets[1] = 'end_conversation' end_targets[4] = -1 support_items = convo['convo_id'], turn['turn_count'] self.collect_one_example(context, end_targets, support_items) features[split] = self.split_feats return features
def accuracy(self, task): batch_test_loss, batch_bleu, batch_success = [], [], [] bleu_scores, accuracy = [], [] for test_pair in progress_bar(self.test_data): test_input, test_output = test_pair loss, predictions, visual = run_inference(self.model, test_input, \ test_output, criterion=NLLLoss(), teach_ratio=0) targets = test_output.data.tolist() predicted_tokens = [vocab.index_to_word(x, task) for x in predictions] target_tokens = [vocab.index_to_word(z[0], task) for z in targets] test_loss = loss.data[0] / test_output.size()[0] bleu_score = BLEU.compute(predicted_tokens, target_tokens) turn_success = all([pred == tar[0] for pred, tar in zip(predictions, targets)]) batch_test_loss.append(test_loss) batch_bleu.append(bleu_score) batch_success.append(turn_success) return batch_processing(batch_test_loss, batch_bleu, batch_success)
def extract_features(self, examples, mode='train'): subarrays = [] for i, method in enumerate(self.feature_methods): name = method.__name__ feature_filename = get_result_filename('{}.{}.npy'.format( name, mode)) try: os.mkdir(os.path.dirname(feature_filename)) except FileExistsError: pass if os.access(feature_filename, os.R_OK): features = np.load(feature_filename) else: feature_list = [] for example in progress_bar(examples, desc=name): feature_list.append(method(example)) features = np.vstack(feature_list) np.save(feature_filename, features) # Set a selected feature source to all zeroes if i in self.ablate: features *= 0 subarrays.append(features) return np.hstack(subarrays)
def fit(self, data_set: DataSetI, n_epochs: int, batch_size: int = 256, silent: bool = False) -> None: """ Fit model to generate given data set :param data_set: DataSetI object containing desired data set :param n_epochs: number of iterations run in training :param batch_size: size of data batch :param silent: if False print progress bar """ n_batches_in_epoch = int(data_set.n_data_points / batch_size) for epoch in progress_bar(range(n_epochs), disable=silent): for batch_index in range(n_batches_in_epoch): # set-up data batch samples = data_set.latent_batch(batch_size, self._latent_dim) labels = data_set.real_batch(batch_size) _ = self._model.train_on_batch(samples, labels) return
gain = args["--gain"] run = dr.EventGenerator(args["--input"], max_events=args["--maxevents"]) NN = min(len(run), args["--maxevents"]) integral = np.zeros(NN, dtype='f4') integral_weighted = np.zeros(NN, dtype='f4') max_pos = np.zeros(NN, dtype='i4') arrival_time = np.zeros(NN, dtype='f4') arrival_time_no_calib = np.zeros(NN, dtype='f4') trapz = np.zeros(NN, dtype='f4') simps = np.zeros(NN, dtype='f4') for i, event in enumerate(progress_bar(run, leave=True)): raw_data = event.data[ch][gain] stop_cell = event.header.stop_cells[ch][gain] calibrated = raw_data - offset[stop_cell:stop_cell+run.roi] t = cell_width[stop_cell:stop_cell+run.roi].cumsum() max_pos[i] = np.argmax(calibrated) s = slice(max_pos[i]-half_integration_window, max_pos[i]+half_integration_window+1) samples = np.arange(s.start, s.stop) cells = dr.sample2cell(samples, stop_cell, total_cells=1024) DLE = partial(digital_leading_edge_discriminator, data=calibrated, threshold=1000) arrival_time[i] = DLE(time=t) arrival_time_no_calib[i] = DLE(time=np.arange(len(calibrated))) integral[i] = calibrated[s].sum()
if __name__ == '__main__': fig, axs = plt.subplots(1, 2, figsize=(14, 6)) for ax in axs: low = ax is axs[0] lens = range(10, 300, 10) if low else range(1000, 30000, 1000) py_time = [] np_time = [] numba1_time = [] numba2_time = [] c_time = [] for l in progress_bar(lens, desc='Lower' if low else 'Upper'): rands = [random.random() for _ in range(l)] numpy_rands = np.array(rands) numba1_time.append( timeit.timeit(lambda: numba_standard_deviation(numpy_rands), number=1000)) numba2_time.append( timeit.timeit( lambda: numba_longer_standard_deviation(numpy_rands), number=1000)) np_time.append( timeit.timeit(lambda: np.std(numpy_rands), number=1000)) c_time.append( timeit.timeit(lambda: std.standard_dev(rands), number=1000)) if low:
def main(): args = get_parser().parse_args() storage_conn = get_native_storage_conn(args.native_metering_connection) total_amount = count_samples(storage_conn, args.start_timestamp, args.end_timestamp) print('%s samples will be migrated to Gnocchi.' % total_amount) # NOTE: we need service credentials to init gnocchiclient config_file = ([args.ceilometer_config_file] if args.ceilometer_config_file else None) gnocchi_conf = service.prepare_service([], config_file) logger = log.getLogger() log_conf = cfg.ConfigOpts() log.register_options(log_conf) log_conf.set_override('log_file', args.log_file) log_conf.set_override('debug', True) log.setup(log_conf, 'ceilometer_migration') time_filters = [] if args.start_timestamp: time_filters.append({">=": {'timestamp': args.start_timestamp}}) if args.end_timestamp: time_filters.append({"<": {'timestamp': args.end_timestamp}}) gnocchi_publisher = gnocchi.GnocchiPublisher(gnocchi_conf, "gnocchi://") batch_size = args.batch_migration_size if total_amount == 'Unknown': total_amount = None orderby = [{"message_id": "asc"}] last_message_id = None migrated_amount = 0 if progress_bar: pbar = progress_bar(total=total_amount, ncols=100, unit='samples') else: pbar = None while migrated_amount < total_amount: if time_filters and last_message_id: filter_expr = { 'and': time_filters + [{ ">": { "message_id": last_message_id } }] } elif time_filters and not last_message_id: if len(time_filters) == 1: filter_expr = time_filters[0] else: filter_expr = {'and': time_filters} elif not time_filters and last_message_id: filter_expr = {">": {"message_id": last_message_id}} else: filter_expr = None samples = storage_conn.query_samples(filter_expr=filter_expr, orderby=orderby, limit=batch_size) samples = list(samples) if not samples: break last_message_id = samples[-1].message_id for sample in samples: logger.info( 'Migrating sample with message_id: %s, meter: %s, ' 'resource_id: %s' % (sample.message_id, sample.counter_name, sample.resource_id)) samples_dict = [sample.as_dict() for sample in samples] gnocchi_publisher.publish_samples(samples_dict) length = len(samples) migrated_amount += length if pbar: pbar.update(length) logger.info("=========== %s metrics data migration done ============" % total_amount)
def backup_to_server(self, less_24_mode=True, zip_mode=True): if not self.is_server_online(): self.logger_backup.info( f"warning: server {self.server_http_url} is OFFLINE") backup_start_time = time() backup_start_datetime = get_current_datetime() if zip_mode: # files generation self.logger_backup.info("generating files ...") files_collection = ExtractFiles(*GetMainContents(), less_24_mode=less_24_mode, verbose=1, overwrite=self.overwrite) self.logger_backup.info("generated.") if less_24_mode: _name = "less_24_files" else: _name = "all_files" # Path object tar_gz_file = CompressFiles(files_collection, _name, overwrite=self.overwrite) del _name if not self.is_server_online(): if windows: windows_notification( "Backup Client", "start python webserver for backup! (2 mins left until start)", 5, "assets/icons/backup.ico", 1) self.logger_backup.info( f"server: {self.server_http_url} is OFFLINE (this time sleeping 2 minutes)" ) sleep(2 * 60) # last chance if not self.is_server_online(): self.logger_backup.exception( f"server: {self.server_http_url} is OFFLINE") raise ConnectionError( f"server: {self.server_http_url} is OFFLINE") self.logger_backup.info( f"\nBackup to: {self.server_http_url} started at: {backup_start_datetime}\n" ) subprocess.call( self.pscp_command.safe_substitute( file_path=tar_gz_file.absolute().as_posix(), destination_folder=server_backup_folder)) tar_gz_file.unlink() response = requests.post(self.server_http_url, json={ "extract": 1, "zip_path": server_backup_folder + tar_gz_file.name }) if response.status_code != 200: try: response.raise_for_status() except requests.RequestException: self.logger_backup.exception( f"Status code: {response.status_code}\n") response.raise_for_status() if not response.json()["extract_result"] == "success": raise ValueError(response.json()["extract_result"]) backup_finish_datetime = get_current_datetime() self.logger_backup.info( f"tar.gz extracted on server successfully\nBackup to: {self.server_http_url} ended at: {backup_finish_datetime}" ) # tar_gz_file.unlink(missing_ok=False) self.logger_backup.info(f"{tar_gz_file.name} deleted") else: # NON ZIP MODE # files generation self.logger_backup.info("generating files ...") files_collection = ExtractFiles(*GetMainContents(), less_24_mode=less_24_mode, verbose=1, overwrite=1) self.logger_backup.info("generated.") if less_24_mode: _name = "less_24_files" else: _name = "all_files" self.logger_backup.info( "generating array with BackupApplicationFiles files ...") files_collection = InstantiateWithBackupApplicationFile( files_collection, server_backup_folder, _name) del _name self.logger_backup.info("generated.") if not self.is_server_online(): if windows: windows_notification( "Backup Client", "start python webserver for backup! (2 mins left until start)", 5, "assets/icons/backup.ico", 1) self.logger_backup.warning( f"server: {self.server_http_url} is OFFLINE (this time sleeping 2 minutes)" ) sleep(2 * 60) # last chance if not self.is_server_online(): self.logger_backup.exception( f"server: {self.server_http_url} is OFFLINE") raise ConnectionError( f"server: {self.server_http_url} is OFFLINE") distance = len(files_collection) backup_progress = progress_bar(iterable=range(distance), desc="backup_to_server", total=distance, unit="file", ncols=120) self.logger_backup.info( f"\nBackup to: {self.server_http_url} started at: {get_current_datetime()}\n" ) if less_24_mode: self.logger_backup.info("copying less 24 files ...") else: self.logger_backup.info("copying all files ...") backup_start_time = time() total_exception_during_backup = 0 iterator = 0 while iterator < distance: try: self.copy_quiet(files_collection[iterator].file, files_collection[iterator].dirname) backup_progress.update(1) except Exception as error: total_exception_during_backup += 1 self.logger_backup.exception( f"\nexception occured at index={iterator}\nfile: {self.files_collection[iterator].file}\ndirname: {self.files_collection[iterator].dirname}\n", print__=False) backup_progress.update(-1) iterator -= 1 iterator += 1 backup_progress.close() self.logger_backup.info( f"{total_exception_during_backup} exceptions occured during backup (see them in log)" ) self.logger_backup.info( f"\nBackup to: {self.server_http_url} ended at: {get_current_datetime()}" ) backup_duration = seconds_to_time(int(time() - backup_start_time)) self.logger_backup.info(f"\nbackup duration: {backup_duration}") if windows: windows_notification("Backup Client", f"backup finished:\n({backup_duration})", 5, "assets/icons/backup.ico", 1) size_in_bytes = 0 for file in files_collection: try: size_in_bytes += os.path.getsize(file) except (FileNotFoundError, PermissionError): pass total_size = convert_size_in_bytes(size_in_bytes) orig_seconds = backup_duration.seconds orig_minutes = backup_duration.minutes try: orig_hours = backup_duration.hours except AttributeError: orig_hours = 0 try: orig_minutes = backup_duration.minutes except AttributeError: orig_minutes = 0 hours = orig_hours + orig_minutes / 60 + orig_seconds / 3600 minutes = orig_hours * 60 + orig_minutes + orig_seconds / 60 seconds = orig_hours * 3600 + orig_minutes * 60 + orig_seconds metadata = { "started_datetime": backup_start_datetime, "finish_datetime": backup_finish_datetime, "total_files": len(files_collection), "total_size": f"{total_size[0]}{total_size[1]}", "total_size_in_bytes": size_in_bytes, "total_exceptions": 0, "process_interrupted": False, "process_interrupted_times": 0, "duration_hours": fixed_set_precision_float(hours, 3), "duration_minutes": fixed_set_precision_float(minutes, 3), "duration_seconds": fixed_set_precision_float(seconds, 3) } metadata_name = f"backup_metadata_{get_current_date()}_{get_current_time().replace(':', '.')}.json" # remote location write_json_to_file(metadata, current_date_remote_metadata_folder / metadata_name) # local project write_json_to_file(metadata, metadata_folder / metadata_name)
def minimize( self, coordinates: np.array, num_steps: int, patience: int = 50, silent: bool = True, ) -> Tuple[Any, Any]: """ Minimize TSP for given coordinate points :param coordinates: coordinates of cities to minimize TSP :param num_steps: number of iterations :param patience: number of epochs without improving solution before terminating :param silent: if False print progress bar during execution :return: tuple with best route and its length """ self.distance_matrix = distance.cdist(coordinates, coordinates) population = self.initialize_population(coordinates.shape[0], self.population_size, self.extra_initialization_rate) self.history["min_fitness"] = np.zeros(num_steps) self.history["mean_fitness"] = np.zeros(num_steps) self.history["max_fitness"] = np.zeros(num_steps) crossover_schedule = self.crossover_schedule_type(num_steps, self.crossover_rate) mutation_schedule = self.mutation_schedule_type(num_steps, self.mutation_rate) for generation in progress_bar(range(num_steps), disable=silent): elite = self.selection(self.fitness, population, int(self.elitism_rate * self.population_size)) self.validate_population(population.numpy()) num_to_crossover = int(self.crossover_rate * self.population_size) mating_pool = self.selection(self.fitness, population, num_to_crossover) offspring = self.create_offspring(mating_pool, int((1 - self.elitism_rate) * self.population_size)) self.validate_population(population.numpy()) num_to_mutate = int(self.mutation_rate * self.population_size) to_mutate = tf.random.uniform( [num_to_mutate, ], maxval=int((1 - self.elitism_rate) * self.population_size), dtype="int32" ) offspring = slice_update(offspring, indices=to_mutate, updates=self.mutate(tf.gather(offspring, to_mutate))) self.validate_population(population.numpy()) # concatenate all solutions and create next generation population = tf.concat([elite, offspring], axis=0) fitness = tf.map_fn(self.fitness, population) self.history["mean_fitness"][generation] = fitness.numpy().mean() self.history["min_fitness"][generation] = fitness.numpy().min() self.history["max_fitness"][generation] = fitness.numpy().max() self.history["epoch"] = generation self.crossover_rate = crossover_schedule[generation] self.mutation_rate = mutation_schedule[generation] validation = self.history["min_fitness"][generation - patience: generation] if np.all(np.diff(validation) == 0) and generation >= patience: return ( np.array_split(population[fitness.numpy().argmin()].numpy(), self.n_agents), fitness.numpy().min() ) return np.array_split(population[fitness.numpy().argmin()].numpy(), self.n_agents), fitness.numpy().min()
def scores_for_proteins(proteins: Iterable, genes_data: DataFrame, big_wig_path: str) -> Tuple[Dict, Namespace]: """Load conservation scores, average when needed, and transform into protein space.""" bw = pyBigWig.open(big_wig_path) score_tracks = {} skipped_premature = set() skipped_key_error = set() mapping_to_many = set() skipped_track_mismatch = set() for protein in progress_bar(proteins): if '*' in protein.sequence[:-1]: skipped_premature.add(protein) continue gene = protein.gene chrom = 'chr' + gene.chrom try: protein_data = genes_data.loc[[(chrom, protein.refseq)]] except KeyError: skipped_key_error.add(protein) continue protein_tracks = [] # transcript might map to more than one genomic locations for genomic_location in protein_data.itertuples(index=False): try: track = extract_track(genomic_location, protein, chrom, bw) except MismatchError: skipped_track_mismatch.add(protein) continue except TypeError: skipped_key_error.add(protein) continue protein_tracks.append(track) protein_tracks = [track for track in protein_tracks if track] if not protein_tracks: continue elif len(protein_tracks) > 1: mapping_to_many.add(protein) protein_track = [mean(scores) for scores in zip(*protein_tracks)] else: protein_track = protein_tracks[0] score_tracks[protein] = convert_to_aa_scores(protein_track) print( f'Averaged data for {len(mapping_to_many)} proteins mapping to more than one genomic location.' ) # print({protein.refseq for protein in mapping_to_many}) print( f'Skipped {len(skipped_premature)} proteins with premature stop codons.' ) # print({protein.gene.name for protein in skipped_premature}) print( f'Failed to find genomic data for {len(skipped_key_error)} proteins.') # print({(protein.gene.name, protein.gene.chrom, protein.gene.strand) for protein in skipped_key_error}) print( f'Conflicting genomic and protein level coordinates for {len(skipped_track_mismatch)} proteins.' ) # print({protein.gene.name for protein in skipped_track_mismatch}) details = Namespace(mapping_to_many_regions=mapping_to_many, skipped=Namespace( premature_stop_codon=skipped_premature, no_genomic_data=skipped_key_error, track_mismatch=skipped_track_mismatch)) return score_tracks, details
for index, user_id in enumerate(user_ids): print("---------------------") print("USER ID:", index, user_id) lookup = { "user_id": user_id, "timeline_length": None, "error_type": None, "error_message": None, "start_at": generate_timestamp(), "end_at": None } timeline = [] try: for status in progress_bar(job.fetch_statuses(user_id=user_id), total=job.status_limit): timeline.append(job.parse_status(status)) lookup["timeline_length"] = len(timeline) except Exception as err: lookup["error_type"] = err.__class__.__name__ lookup["error_message"] = str(err) lookup["end_at"] = generate_timestamp() print(lookup) lookups.append(lookup) if any(timeline): print("SAVING", len(timeline), "TIMELINE TWEETS...") errors = job.save_timeline(timeline) if errors: pprint(errors)
cell_width = pd.read_csv(args["--tc"])["cell_width"].values run = dr.EventGenerator(args["--input"], max_events=args["--maxevents"]) offset = np.genfromtxt(args["--offset"])[:, 0] # trick to omit np.roll offset = np.concatenate((offset, offset)) cell_width = np.concatenate([cell_width] * 5) cell_width = np.roll(cell_width, 1) ch = args["--channel"] gain = args["--gain"] bins = [np.linspace(50, 80, 301), np.linspace(-500, 2500, 601)] histo, _, _ = np.histogram2d([], [], bins=bins) for event in progress_bar(run, leave=True): raw_data = event.data[ch][gain] stop_cell = event.header.stop_cells[ch][gain] calibrated = raw_data - offset[stop_cell:stop_cell + run.roi] t = cell_width[stop_cell:stop_cell + run.roi].cumsum() h, _, _ = np.histogram2d(t, calibrated, bins=bins) histo += h # normalize histo along y histo /= histo.mean(axis=1)[:, np.newaxis] plt.figure() plt.imshow( histo.T, cmap="viridis",
ch = args["--channel"] gain = args["--gain"] run = dr.EventGenerator(args["--input"], max_events=args["--maxevents"]) NN = min(len(run), args["--maxevents"]) integral = np.zeros(NN, dtype='f4') integral_weighted = np.zeros(NN, dtype='f4') max_pos = np.zeros(NN, dtype='i4') arrival_time = np.zeros(NN, dtype='f4') arrival_time_no_calib = np.zeros(NN, dtype='f4') trapz = np.zeros(NN, dtype='f4') simps = np.zeros(NN, dtype='f4') for i, event in enumerate(progress_bar(run, leave=True)): raw_data = event.data[ch][gain] stop_cell = event.header.stop_cells[ch][gain] calibrated = raw_data - offset[stop_cell:stop_cell + run.roi] t = cell_width[stop_cell:stop_cell + run.roi].cumsum() max_pos[i] = np.argmax(calibrated) s = slice(max_pos[i] - half_integration_window, max_pos[i] + half_integration_window + 1) samples = np.arange(s.start, s.stop) cells = dr.sample2cell(samples, stop_cell, total_cells=1024) DLE = partial(digital_leading_edge_discriminator, data=calibrated, threshold=1000)
offset = np.genfromtxt(args["--offset"])[:,0] # trick to omit np.roll offset = np.concatenate((offset, offset)) cell_width = np.concatenate([cell_width]*5) cell_width = np.roll(cell_width, 1) ch = args["--channel"] gain = args["--gain"] bins = [np.linspace(50, 80, 301), np.linspace(-500, 2500, 601)] histo, _, _ = np.histogram2d([],[], bins=bins) for event in progress_bar(run, leave=True): raw_data = event.data[ch][gain] stop_cell = event.header.stop_cells[ch][gain] calibrated = raw_data - offset[stop_cell:stop_cell+run.roi] t = cell_width[stop_cell:stop_cell+run.roi].cumsum() h, _, _ = np.histogram2d(t, calibrated, bins=bins) histo += h # normalize histo along y histo /= histo.mean(axis=1)[:, np.newaxis]
def _create_source(index, dictionary, tfidf, symmetric, dominant, nonzero_limit, dtype): """Build a sparse term similarity matrix using a term similarity index. Returns ------- matrix : :class:`scipy.sparse.coo_matrix` The sparse term similarity matrix. """ assert isinstance(index, TermSimilarityIndex) assert dictionary is not None matrix_order = len(dictionary) if matrix_order == 0: raise ValueError('Dictionary provided to SparseTermSimilarityMatrix must not be empty') logger.info("constructing a sparse term similarity matrix using %s", index) if nonzero_limit is None: nonzero_limit = matrix_order def tfidf_sort_key(term_index): if isinstance(term_index, tuple): term_index, *_ = term_index term_idf = tfidf.idfs[term_index] return (-term_idf, term_index) if tfidf is None: columns = sorted(dictionary.keys()) logger.info("iterating over %i columns in dictionary order", len(columns)) else: assert max(tfidf.idfs) == matrix_order - 1 columns = sorted(tfidf.idfs.keys(), key=tfidf_sort_key) logger.info("iterating over %i columns in tf-idf order", len(columns)) nonzero_counter_dtype = _shortest_uint_dtype(nonzero_limit) column_nonzero = np.array([0] * matrix_order, dtype=nonzero_counter_dtype) if dominant: column_sum = np.zeros(matrix_order, dtype=dtype) if symmetric: assigned_cells = set() row_buffer = array('Q') column_buffer = array('Q') if dtype is np.float16 or dtype is np.float32: data_buffer = array('f') elif dtype is np.float64: data_buffer = array('d') else: raise ValueError('Dtype %s is unsupported, use numpy.float16, float32, or float64.' % dtype) def cell_full(t1_index, t2_index, similarity): if dominant and column_sum[t1_index] + abs(similarity) >= 1.0: return True # after adding the similarity, the matrix would cease to be strongly diagonally dominant assert column_nonzero[t1_index] <= nonzero_limit if column_nonzero[t1_index] == nonzero_limit: return True # after adding the similarity, the column would contain more than nonzero_limit elements if symmetric and (t1_index, t2_index) in assigned_cells: return True # a similarity has already been assigned to this cell return False def populate_buffers(t1_index, t2_index, similarity): column_buffer.append(t1_index) row_buffer.append(t2_index) data_buffer.append(similarity) column_nonzero[t1_index] += 1 if symmetric: assigned_cells.add((t1_index, t2_index)) if dominant: column_sum[t1_index] += abs(similarity) try: from tqdm import tqdm as progress_bar except ImportError: def progress_bar(iterable): return iterable for column_number, t1_index in enumerate(progress_bar(columns)): column_buffer.append(column_number) row_buffer.append(column_number) data_buffer.append(1.0) if nonzero_limit <= 0: continue t1 = dictionary[t1_index] num_nonzero = column_nonzero[t1_index] num_rows = nonzero_limit - num_nonzero most_similar = [ (dictionary.token2id[term], similarity) for term, similarity in index.most_similar(t1, topn=num_rows) if term in dictionary.token2id ] if num_rows > 0 else [] if tfidf is None: rows = sorted(most_similar) else: rows = sorted(most_similar, key=tfidf_sort_key) for t2_index, similarity in rows: if cell_full(t1_index, t2_index, similarity): continue if not symmetric: populate_buffers(t1_index, t2_index, similarity) elif not cell_full(t2_index, t1_index, similarity): populate_buffers(t1_index, t2_index, similarity) populate_buffers(t2_index, t1_index, similarity) data_buffer = np.frombuffer(data_buffer, dtype=dtype) row_buffer = np.frombuffer(row_buffer, dtype=np.uint64) column_buffer = np.frombuffer(column_buffer, dtype=np.uint64) matrix = sparse.coo_matrix((data_buffer, (row_buffer, column_buffer)), shape=(matrix_order, matrix_order)) logger.info( "constructed a sparse term similarity matrix with %0.06f%% density", 100.0 * matrix.getnnz() / matrix_order**2, ) return matrix
import json import random import torch from tqdm import tqdm as progress_bar from transformers import BertTokenizer, BertModel, RobertaTokenizer, RobertaModel tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased') print("Loading data ...") utt_texts = json.load(open(f'data/utterances.json', 'r')) num_cands = len(utt_texts) utt_vectors = [] cand_embeds, cand_segments, cand_masks = [], [], [] for cand_text in progress_bar(utt_texts, total=num_cands): cand_inputs = tokenizer(cand_text, return_tensors="pt") with torch.no_grad(): cand_outputs = model(**cand_inputs) utt_vectors.append(cand_outputs.pooler_output) utt_vectors = torch.cat(utt_vectors) print("utt_vectors: {}".format(utt_vectors.shape)) torch.save(utt_vectors, 'data/utt_vectors.pt')
def style_transfer( content_image_path, style_image_path, outputs_dir, n_epochs: int, content_weight: float = 3e-2, style_weights: tuple = (20000, 500, 12, 1, 1), smoothness_weight: float = 5e-2, content_layer: str = "block4_conv2", style_layers: tuple = ("block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1"), save_frequency: int = None, ): width, height = tf.keras.preprocessing.image.load_img( content_image_path).size save_frequency = save_frequency or n_epochs content_image = K.variable( read_image_as_tensor(content_image_path, (height, width))) style_image = K.variable( read_image_as_tensor(style_image_path, (height, width))) generated_image = K.placeholder( (1, height, width, 3)) # tensor placeholder for generated image input_as_tensor = tf.concat([content_image, style_image, generated_image], axis=0) model = tf.keras.applications.vgg19.VGG19(input_tensor=input_as_tensor, weights="imagenet", include_top=False) layer_to_output_mapping = { layer.name: layer.output for layer in model.layers } # Extract features from the content layer content_features = layer_to_output_mapping[content_layer] base_image_features = content_features[0, :, :, :] # 0 corresponds to base combination_features = content_features[ 2, :, :, :] # 2 corresponds to generated # Compute total loss content_loss_value = content_weight * feature_reconstruction_loss( base_image_features, combination_features) style_loss_value = style_loss_for_all_layers(style_layers, style_weights, layer_to_output_mapping) smoothness_loss_value = smoothness_weight * smoothness_loss( generated_image) total_loss = content_loss_value + style_loss_value + smoothness_loss_value # Compute gradients of output img with respect to total_loss grads = K.gradients(total_loss, generated_image) outputs = [total_loss] + grads loss_and_grads = K.function([generated_image], outputs) # Initialize the generated image from random noise x = np.random.uniform(0, 255, (1, height, width, 3)) - 128. # Fit over the total iterations for epoch in progress_bar(range(n_epochs)): x, min_val, info = fmin_l_bfgs_b( # extract loss function from tf model func=lambda x: loss_and_grads([x.reshape( (1, height, width, 3))])[0], x0=x.flatten(), # extract gradients from tf model fprime=lambda x: loss_and_grads([x.reshape( (1, height, width, 3))])[1].flatten().astype("float64"), maxfun=20, ) if epoch % save_frequency == 0: generated_image = tensor_to_image(x.copy(), width, height) io.imsave( os.path.join(outputs_dir, f"generated_image_at_{epoch}_epoch.jpg"), generated_image) return tensor_to_image(x.copy(), width, height)
def main(config_path: str, resume_path=None): # Read the experiment configuration config = read_config(config_path) # Generate experiment file structure logs_dir, config_save_path, video_path, checkpoint_path, code_save_path = paths(config_path) # Read config save_config(config, config_save_path) save_code(code_save_path) # Set random seed torch.manual_seed(config.seed) # Instantiate components env, policies, storages = instantiate(config) # Resume from given checkpoint if resume_path is not None: policies = torch.load(resume_path) # Gather name form "<anything>-123<anything>" start_iteration = int(re.findall('-(\d+)\.tar', resume_path)[0]) for policy in policies: policy.scheduler.current_iteration = start_iteration # TODO also save scheduler values when serializing model policy.sync_scheduled_values() print(f'Resuming from iteration {start_iteration}') else: start_iteration = 0 if config.viz_scripted_mode: [*env_history, end_reason] = simulate_episode(env, policies, SCRIPTED) create_animation(env_history, video_path % (0, 'scripted')) return with warnings.catch_warnings(): # Silence tensorflow (2.0) deprecated usages of numpy warnings.simplefilter('ignore', FutureWarning) logs_writer = SummaryWriter(logs_dir) if config.compare_interval > 0: comparison_policies, _ = read_models(config.comparison_models_dir) try: start_time = time() # Main training loop for update_number in progress_bar(range(start_iteration, config.num_iterations), 'Training'): # Collect rollouts and update weights training_history = perform_update(config, env, policies, storages) # Write progress summaries if do_this_iteration(config.log_interval, update_number, config.num_iterations): log_layers(policies, logs_writer, update_number) log_scalars(training_history, logs_writer, update_number, env) # Evaluate and record video if do_this_iteration(config.eval_interval, update_number, config.num_iterations): for sampling_method in [SAMPLE, DETERMINISTIC]: [*env_history, _] = simulate_episode(env, policies, sampling_method) create_animation(env_history, video_path % (update_number, action_source_names[sampling_method])) # Checkpoint current model weights if do_this_iteration(config.save_interval, update_number, config.num_iterations): save_model(policies, checkpoint_path % update_number) # Evaluate against other models if do_this_iteration(config.compare_interval, update_number, config.num_iterations): won_statuses, rewards = play_against_others(env, policies, comparison_policies, config.comparison_num_episodes) log_comparisons(won_statuses, rewards, logs_writer, update_number) elapsed_time = start_time - time() # in seconds elapsed_time /= 60 # in minutes elapsed_time /= 60 # in hours if elapsed_time >= config.max_run_time: break except KeyboardInterrupt: print('Stopped training, finishing up...') # Save final weights if config.save_interval > 0: save_model(policies, checkpoint_path % update_number) if config.eval_interval > 0: [*env_history, _] = simulate_episode(env, policies, SAMPLE) create_animation(env_history, video_path % (update_number, action_source_names[SAMPLE])) # Save hyperparams and metrics (comparisons against others and themselves) selves_wons, selves_rewards = play_against_others(env, policies, [policies], config.comparison_num_episodes) if config.compare_interval > 0: others_wons, others_rewards = play_against_others(env, policies, comparison_policies, config.comparison_num_episodes) else: others_wons, others_rewards = None, None log_hyperparams_and_metrics(config, selves_wons, selves_rewards, others_wons, others_rewards, logs_writer, start_time) # TODO log final # Flush logs logs_writer.close()