def __init__(self, coordinator, in_dir, logger): super(DataFeeder, self).__init__() self._coordinator = coordinator self._in_dir = in_dir self._logger = logger self._metadata = load_metadata(os.path.join(in_dir, 'train.txt'), self._logger) random.shuffle(self._metadata) self._cursor = 0 # index of the next sample self._num_samples = len(self._metadata) self._hparams = hparams self.batch_size = hparams.get('batch_size') self.superbatch_size = hparams.get('superbatch_size') self.outputs_per_step = hparams.get('outputs_per_step') # Placeholders for inputs and targets. self._placeholders = [ tf.placeholder(tf.int32, [None, None], 'inputs'), tf.placeholder(tf.int32, [None], 'input_lengths'), tf.placeholder(tf.float32, [None, None, hparams.get('num_mels')], 'mel_targets'), tf.placeholder(tf.float32, [None, None, hparams.get('num_freq')], 'linear_targets') ] # Create queue of capacity 8 for buffering data which # will buffer 8 superbatches onto the FIFO queue queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32], name='input_queue') self._enqueue_operation = queue.enqueue(self._placeholders) self.current_batch = Batch(queue.dequeue(), prep=False) self.current_batch.set_shapes(self._placeholders)
def select_inputs(indices): return ( encoder_output.index_select(0, indices), decoder_output.index_select(0, indices), encoder_mask.index_select(0, indices), decoder_mask.index_select(0, indices), Batch.index_select(batch, indices), )
def predict(model, data, input_paths, args, output_directory, gpu, run_evaluation=False, epoch=None): model.eval() input_files = {(f, l): input_paths[(f, l)] for f, l in args.frameworks} sentences = {(f, l): {} for f, l in args.frameworks} for framework, language in args.frameworks: with open(input_files[(framework, language)], encoding="utf8") as f: for line in f.readlines(): line = json.loads(line) if not sentence_condition(line, framework, language): continue line["nodes"] = [] line["edges"] = [] line["tops"] = [] line["framework"] = framework line["language"] = language sentences[(framework, language)][line["id"]] = line for i, batch in enumerate(data): with torch.no_grad(): all_predictions = model(Batch.to(batch, gpu), inference=True) for (framework, language), predictions in all_predictions.items(): for prediction in predictions: for key, value in prediction.items(): sentences[(framework, language)][prediction["id"]][key] = value for framework, language in args.frameworks: output_path = f"{output_directory}/prediction_{framework}_{language}.json" with open(output_path, "w", encoding="utf8") as f: for sentence in sentences[(framework, language)].values(): json.dump(sentence, f, ensure_ascii=False) f.write("\n") f.flush() if args.log_wandb: import wandb wandb.save(output_path) if run_evaluation: # this should be run in parallel, if your setup allows it evaluate(output_directory, epoch, framework, language, input_files[(framework, language)])
def load(self, checkpoint_dir, restore_step, model_name='tacotron'): print('Constructing model: %s' % model_name) inputs = tf.placeholder(tf.int32, [1, None], 'inputs') input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths') # create a batch with a single input and no spectrograms b = Batch((inputs, input_lengths, None, None), prep=False) with tf.variable_scope('model') as scope: self.model = Tacotron(hparams=hparams) self.model.initialize(b) self.wav_output = audio.spectrogram_tensorflow_inv( self.model.linear_outputs[0]) print('Loading checkpoint: %s' % checkpoint_dir) self.session = tf.Session() self.session.run(tf.global_variables_initializer()) # Restore from a checkpoint if the user requested it. restore_dir = '%s-%d' % (checkpoint_dir, restore_step) saver = tf.train.Saver() saver.restore(self.session, restore_dir)
def _enqueue_next_superbatch(self): ''' Get the next superbatch (a list of batches). The size of superbatches is set in hparams. ''' start = time.time() superbatch = [ self._get_next_sample() for _ in range(self.superbatch_size * self.batch_size) ] # sort the samples in the superbatch on length w.r.t. time superbatch.sort(key=lambda x: x[-1]) # now bucket the batches in that order to improve efficiency batches = [ Batch(superbatch[i:i + self.batch_size]) for i in range(0, len(superbatch), self.batch_size) ] random.shuffle(batches) self._logger.log('Generated %d batches of size %d in %.03f sec' % (len(batches), self.batch_size, time.time() - start)) for batch in batches: feed_dict = dict(zip(self._placeholders, batch.get_all())) self._session.run(self._enqueue_operation, feed_dict=feed_dict)
class DataFeeder(threading.Thread): ''' Feeds batches from the dataset that has been generated at the in_dir path ''' def __init__(self, coordinator, in_dir, logger): super(DataFeeder, self).__init__() self._coordinator = coordinator self._in_dir = in_dir self._logger = logger self._metadata = load_metadata(os.path.join(in_dir, 'train.txt'), self._logger) random.shuffle(self._metadata) self._cursor = 0 # index of the next sample self._num_samples = len(self._metadata) self._hparams = hparams self.batch_size = hparams.get('batch_size') self.superbatch_size = hparams.get('superbatch_size') self.outputs_per_step = hparams.get('outputs_per_step') # Placeholders for inputs and targets. self._placeholders = [ tf.placeholder(tf.int32, [None, None], 'inputs'), tf.placeholder(tf.int32, [None], 'input_lengths'), tf.placeholder(tf.float32, [None, None, hparams.get('num_mels')], 'mel_targets'), tf.placeholder(tf.float32, [None, None, hparams.get('num_freq')], 'linear_targets') ] # Create queue of capacity 8 for buffering data which # will buffer 8 superbatches onto the FIFO queue queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32], name='input_queue') self._enqueue_operation = queue.enqueue(self._placeholders) self.current_batch = Batch(queue.dequeue(), prep=False) self.current_batch.set_shapes(self._placeholders) def start_in_session(self, session): self._session = session self.start() def run(self): ''' Override of the threading.Thread run method ''' try: while not self._coordinator.should_stop(): self._enqueue_next_superbatch() except Exception as e: traceback.print_exc() self._coordinator.request_stop(e) def _enqueue_next_superbatch(self): ''' Get the next superbatch (a list of batches). The size of superbatches is set in hparams. ''' start = time.time() superbatch = [ self._get_next_sample() for _ in range(self.superbatch_size * self.batch_size) ] # sort the samples in the superbatch on length w.r.t. time superbatch.sort(key=lambda x: x[-1]) # now bucket the batches in that order to improve efficiency batches = [ Batch(superbatch[i:i + self.batch_size]) for i in range(0, len(superbatch), self.batch_size) ] random.shuffle(batches) self._logger.log('Generated %d batches of size %d in %.03f sec' % (len(batches), self.batch_size, time.time() - start)) for batch in batches: feed_dict = dict(zip(self._placeholders, batch.get_all())) self._session.run(self._enqueue_operation, feed_dict=feed_dict) def _get_next_sample(self): ''' Loads a single sample from the dataset Output: (Onehot text input, mel target, linear target, cost) ''' lin_target_path, mel_target_path, n_frames, text = self._metadata[ self._cursor][:4] self.increment_cursor() lin_target = np.load(os.path.join(self._in_dir, lin_target_path)) mel_target = np.load(os.path.join(self._in_dir, mel_target_path)) onehot_text = text_to_onehot(text) return (onehot_text, mel_target, lin_target, n_frames) def increment_cursor(self): ''' Increments the dataset cursor, or sets it to 0 if we have reached the end of the dataset ''' if self._cursor >= self._num_samples - 1: # start from beginning and shuffle the # data again self._cursor = 0 random.shuffle(self._metadata) else: self._cursor += 1
def __call__(self, batch): batch.sort(key=lambda example: example["every_input"][0].size(0), reverse=True) return Batch.build(batch)
def main_worker(gpu, n_gpus_per_node, args): is_master = gpu == 0 directory = initialize(args, create_directory=is_master, init_wandb=args.log_wandb and is_master) os.environ["MASTER_ADDR"] = "localhost" if "MASTER_PORT" not in os.environ: os.environ["MASTER_PORT"] = "12345" if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method="env://", world_size=n_gpus_per_node, rank=gpu) dataset = SharedDataset(args) dataset.load_datasets(args, gpu, n_gpus_per_node) model = Model(dataset, args) parameters = [{ "params": p, "weight_decay": args.encoder_weight_decay } for p in model.get_encoder_parameters(args.n_encoder_layers) ] + [{ "params": model.get_decoder_parameters(), "weight_decay": args.decoder_weight_decay }] optimizer = AdamW(parameters, betas=(0.9, args.beta_2)) scheduler = multi_scheduler_wrapper(optimizer, args) autoclip = AutoClip([ p for name, p in model.named_parameters() if "loss_weights" not in name ]) if args.balance_loss_weights: loss_weight_learner = LossWeightLearner(args, model, n_gpus_per_node) if is_master: if args.log_wandb: import wandb wandb.watch(model, log=args.wandb_log_mode) print(f"\nmodel: {model}\n") log = Log(dataset, model, optimizer, args, directory, log_each=10, log_wandb=args.log_wandb) torch.cuda.set_device(gpu) model = model.cuda(gpu) if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) raw_model = model.module else: raw_model = model force_cpu_dev = False #changed - along below if force_cpu_dev: dev0 = torch.device("cpu") model.to(dev0) gpu = dev0 for epoch in range(args.epochs): # # TRAINING # model.train() if is_master: log.train(len_dataset=dataset.train_size) i = 0 model.zero_grad() losses_over_bs = [] #changed - added to accum losses on for batch in dataset.train: if not force_cpu_dev: #changed - if clause added batch = Batch.to(batch, gpu) total_loss, losses, stats = model(batch) for head in raw_model.heads: stats.update(head.loss_weights_dict()) if args.balance_loss_weights: loss_weight_learner.compute_grad(losses, epoch) losses_over_bs.append( total_loss.item()) #changed - added for analyzing loss total_loss.backward() if (i + 1) % args.accumulation_steps == 0: grad_norm = autoclip() if args.balance_loss_weights: loss_weight_learner.step(epoch) scheduler(epoch) optimizer.step() model.zero_grad() if is_master: with torch.no_grad(): batch_size = batch["every_input"][0].size( 0) * args.accumulation_steps log(batch_size, stats, args.frameworks, grad_norm=grad_norm, learning_rates=scheduler.lr() + [loss_weight_learner.scheduler.lr()]) del total_loss, losses i += 1 if not is_master: continue # # VALIDATION CROSS-ENTROPIES # model.eval() log.eval(len_dataset=dataset.val_size) with torch.no_grad(): for batch in dataset.val: try: _, _, stats = model(Batch.to(batch, gpu)) batch_size = batch["every_input"][0].size(0) log(batch_size, stats, args.frameworks) except RuntimeError as e: if 'out of memory' in str(e): print('| WARNING: ran out of memory, skipping batch') if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() else: raise e lobs = np.array(losses_over_bs) #changed to be uses with below print( str(lobs.mean()) + "; " + str(lobs.max()) + "; " + str(lobs.min())) #changed - print loss for epoch log.flush() # # VALIDATION MRP-SCORES # predict(raw_model, dataset.val, args.validation_data, args, directory, gpu, run_evaluation=True, epoch=epoch) # # TEST PREDICTION # test_fpath = f"{directory}/test_predictions/" #changed - catch exists error if not os.path.exists(test_fpath): os.mkdir(test_fpath) #os.mkdir(f"{directory}/test_predictions/") predict(raw_model, dataset.test, args.test_data, args, f"{directory}/test_predictions/", gpu)