Esempio n. 1
0
    def __init__(self, coordinator, in_dir, logger):
        super(DataFeeder, self).__init__()
        self._coordinator = coordinator
        self._in_dir = in_dir
        self._logger = logger
        self._metadata = load_metadata(os.path.join(in_dir, 'train.txt'),
                                       self._logger)
        random.shuffle(self._metadata)
        self._cursor = 0  # index of the next sample
        self._num_samples = len(self._metadata)
        self._hparams = hparams
        self.batch_size = hparams.get('batch_size')
        self.superbatch_size = hparams.get('superbatch_size')
        self.outputs_per_step = hparams.get('outputs_per_step')

        # Placeholders for inputs and targets.
        self._placeholders = [
            tf.placeholder(tf.int32, [None, None], 'inputs'),
            tf.placeholder(tf.int32, [None], 'input_lengths'),
            tf.placeholder(tf.float32,
                           [None, None, hparams.get('num_mels')],
                           'mel_targets'),
            tf.placeholder(tf.float32,
                           [None, None, hparams.get('num_freq')],
                           'linear_targets')
        ]

        # Create queue of capacity 8 for buffering data which
        # will buffer 8 superbatches onto the FIFO queue
        queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32],
                             name='input_queue')
        self._enqueue_operation = queue.enqueue(self._placeholders)
        self.current_batch = Batch(queue.dequeue(), prep=False)
        self.current_batch.set_shapes(self._placeholders)
Esempio n. 2
0
 def select_inputs(indices):
     return (
         encoder_output.index_select(0, indices),
         decoder_output.index_select(0, indices),
         encoder_mask.index_select(0, indices),
         decoder_mask.index_select(0, indices),
         Batch.index_select(batch, indices),
     )
Esempio n. 3
0
def predict(model,
            data,
            input_paths,
            args,
            output_directory,
            gpu,
            run_evaluation=False,
            epoch=None):
    model.eval()
    input_files = {(f, l): input_paths[(f, l)] for f, l in args.frameworks}

    sentences = {(f, l): {} for f, l in args.frameworks}
    for framework, language in args.frameworks:
        with open(input_files[(framework, language)], encoding="utf8") as f:
            for line in f.readlines():
                line = json.loads(line)

                if not sentence_condition(line, framework, language):
                    continue

                line["nodes"] = []
                line["edges"] = []
                line["tops"] = []
                line["framework"] = framework
                line["language"] = language
                sentences[(framework, language)][line["id"]] = line

    for i, batch in enumerate(data):
        with torch.no_grad():
            all_predictions = model(Batch.to(batch, gpu), inference=True)

        for (framework, language), predictions in all_predictions.items():
            for prediction in predictions:
                for key, value in prediction.items():
                    sentences[(framework,
                               language)][prediction["id"]][key] = value

    for framework, language in args.frameworks:
        output_path = f"{output_directory}/prediction_{framework}_{language}.json"
        with open(output_path, "w", encoding="utf8") as f:
            for sentence in sentences[(framework, language)].values():
                json.dump(sentence, f, ensure_ascii=False)
                f.write("\n")
                f.flush()

        if args.log_wandb:
            import wandb
            wandb.save(output_path)

        if run_evaluation:
            # this should be run in parallel, if your setup allows it
            evaluate(output_directory, epoch, framework, language,
                     input_files[(framework, language)])
Esempio n. 4
0
    def load(self, checkpoint_dir, restore_step, model_name='tacotron'):
        print('Constructing model: %s' % model_name)
        inputs = tf.placeholder(tf.int32, [1, None], 'inputs')
        input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths')
        # create a batch with a single input and no spectrograms
        b = Batch((inputs, input_lengths, None, None), prep=False)
        with tf.variable_scope('model') as scope:
            self.model = Tacotron(hparams=hparams)
            self.model.initialize(b)
            self.wav_output = audio.spectrogram_tensorflow_inv(
                self.model.linear_outputs[0])

        print('Loading checkpoint: %s' % checkpoint_dir)
        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
        # Restore from a checkpoint if the user requested it.
        restore_dir = '%s-%d' % (checkpoint_dir, restore_step)
        saver = tf.train.Saver()
        saver.restore(self.session, restore_dir)
Esempio n. 5
0
 def _enqueue_next_superbatch(self):
     '''
         Get the next superbatch (a list of batches). 
         The size of superbatches is set in hparams.
     '''
     start = time.time()
     superbatch = [
         self._get_next_sample()
         for _ in range(self.superbatch_size * self.batch_size)
     ]
     # sort the samples in the superbatch on length w.r.t. time
     superbatch.sort(key=lambda x: x[-1])
     # now bucket the batches in that order to improve efficiency
     batches = [
         Batch(superbatch[i:i + self.batch_size])
         for i in range(0, len(superbatch), self.batch_size)
     ]
     random.shuffle(batches)
     self._logger.log('Generated %d batches of size %d in %.03f sec' %
                      (len(batches), self.batch_size, time.time() - start))
     for batch in batches:
         feed_dict = dict(zip(self._placeholders, batch.get_all()))
         self._session.run(self._enqueue_operation, feed_dict=feed_dict)
Esempio n. 6
0
class DataFeeder(threading.Thread):
    '''
        Feeds batches from the dataset that has been
        generated at the in_dir path
    '''
    def __init__(self, coordinator, in_dir, logger):
        super(DataFeeder, self).__init__()
        self._coordinator = coordinator
        self._in_dir = in_dir
        self._logger = logger
        self._metadata = load_metadata(os.path.join(in_dir, 'train.txt'),
                                       self._logger)
        random.shuffle(self._metadata)
        self._cursor = 0  # index of the next sample
        self._num_samples = len(self._metadata)
        self._hparams = hparams
        self.batch_size = hparams.get('batch_size')
        self.superbatch_size = hparams.get('superbatch_size')
        self.outputs_per_step = hparams.get('outputs_per_step')

        # Placeholders for inputs and targets.
        self._placeholders = [
            tf.placeholder(tf.int32, [None, None], 'inputs'),
            tf.placeholder(tf.int32, [None], 'input_lengths'),
            tf.placeholder(tf.float32,
                           [None, None, hparams.get('num_mels')],
                           'mel_targets'),
            tf.placeholder(tf.float32,
                           [None, None, hparams.get('num_freq')],
                           'linear_targets')
        ]

        # Create queue of capacity 8 for buffering data which
        # will buffer 8 superbatches onto the FIFO queue
        queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32],
                             name='input_queue')
        self._enqueue_operation = queue.enqueue(self._placeholders)
        self.current_batch = Batch(queue.dequeue(), prep=False)
        self.current_batch.set_shapes(self._placeholders)

    def start_in_session(self, session):
        self._session = session
        self.start()

    def run(self):
        '''
            Override of the threading.Thread run method
        '''
        try:
            while not self._coordinator.should_stop():
                self._enqueue_next_superbatch()
        except Exception as e:
            traceback.print_exc()
            self._coordinator.request_stop(e)

    def _enqueue_next_superbatch(self):
        '''
            Get the next superbatch (a list of batches). 
            The size of superbatches is set in hparams.
        '''
        start = time.time()
        superbatch = [
            self._get_next_sample()
            for _ in range(self.superbatch_size * self.batch_size)
        ]
        # sort the samples in the superbatch on length w.r.t. time
        superbatch.sort(key=lambda x: x[-1])
        # now bucket the batches in that order to improve efficiency
        batches = [
            Batch(superbatch[i:i + self.batch_size])
            for i in range(0, len(superbatch), self.batch_size)
        ]
        random.shuffle(batches)
        self._logger.log('Generated %d batches of size %d in %.03f sec' %
                         (len(batches), self.batch_size, time.time() - start))
        for batch in batches:
            feed_dict = dict(zip(self._placeholders, batch.get_all()))
            self._session.run(self._enqueue_operation, feed_dict=feed_dict)

    def _get_next_sample(self):
        '''
            Loads a single sample from the dataset
            
            Output:
            (Onehot text input, mel target, linear target, cost)
        '''
        lin_target_path, mel_target_path, n_frames, text = self._metadata[
            self._cursor][:4]
        self.increment_cursor()
        lin_target = np.load(os.path.join(self._in_dir, lin_target_path))
        mel_target = np.load(os.path.join(self._in_dir, mel_target_path))
        onehot_text = text_to_onehot(text)
        return (onehot_text, mel_target, lin_target, n_frames)

    def increment_cursor(self):
        '''
            Increments the dataset cursor, or sets it
            to 0 if we have reached the end of the dataset
        '''
        if self._cursor >= self._num_samples - 1:
            # start from beginning and shuffle the
            # data again
            self._cursor = 0
            random.shuffle(self._metadata)
        else:
            self._cursor += 1
Esempio n. 7
0
 def __call__(self, batch):
     batch.sort(key=lambda example: example["every_input"][0].size(0),
                reverse=True)
     return Batch.build(batch)
Esempio n. 8
0
def main_worker(gpu, n_gpus_per_node, args):
    is_master = gpu == 0
    directory = initialize(args,
                           create_directory=is_master,
                           init_wandb=args.log_wandb and is_master)

    os.environ["MASTER_ADDR"] = "localhost"
    if "MASTER_PORT" not in os.environ:
        os.environ["MASTER_PORT"] = "12345"

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method="env://",
                                world_size=n_gpus_per_node,
                                rank=gpu)

    dataset = SharedDataset(args)
    dataset.load_datasets(args, gpu, n_gpus_per_node)

    model = Model(dataset, args)
    parameters = [{
        "params": p,
        "weight_decay": args.encoder_weight_decay
    } for p in model.get_encoder_parameters(args.n_encoder_layers)
                  ] + [{
                      "params": model.get_decoder_parameters(),
                      "weight_decay": args.decoder_weight_decay
                  }]
    optimizer = AdamW(parameters, betas=(0.9, args.beta_2))
    scheduler = multi_scheduler_wrapper(optimizer, args)
    autoclip = AutoClip([
        p for name, p in model.named_parameters() if "loss_weights" not in name
    ])
    if args.balance_loss_weights:
        loss_weight_learner = LossWeightLearner(args, model, n_gpus_per_node)

    if is_master:
        if args.log_wandb:
            import wandb
            wandb.watch(model, log=args.wandb_log_mode)
        print(f"\nmodel: {model}\n")
        log = Log(dataset,
                  model,
                  optimizer,
                  args,
                  directory,
                  log_each=10,
                  log_wandb=args.log_wandb)

    torch.cuda.set_device(gpu)
    model = model.cuda(gpu)

    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=[gpu])
        raw_model = model.module
    else:
        raw_model = model

    force_cpu_dev = False  #changed - along below
    if force_cpu_dev:
        dev0 = torch.device("cpu")
        model.to(dev0)
        gpu = dev0

    for epoch in range(args.epochs):

        #
        # TRAINING
        #

        model.train()
        if is_master:
            log.train(len_dataset=dataset.train_size)

        i = 0
        model.zero_grad()
        losses_over_bs = []  #changed - added to accum losses on
        for batch in dataset.train:
            if not force_cpu_dev:  #changed - if clause added
                batch = Batch.to(batch, gpu)
            total_loss, losses, stats = model(batch)

            for head in raw_model.heads:
                stats.update(head.loss_weights_dict())

            if args.balance_loss_weights:
                loss_weight_learner.compute_grad(losses, epoch)

            losses_over_bs.append(
                total_loss.item())  #changed - added for analyzing loss
            total_loss.backward()

            if (i + 1) % args.accumulation_steps == 0:
                grad_norm = autoclip()

                if args.balance_loss_weights:
                    loss_weight_learner.step(epoch)
                scheduler(epoch)
                optimizer.step()
                model.zero_grad()

                if is_master:
                    with torch.no_grad():
                        batch_size = batch["every_input"][0].size(
                            0) * args.accumulation_steps
                        log(batch_size,
                            stats,
                            args.frameworks,
                            grad_norm=grad_norm,
                            learning_rates=scheduler.lr() +
                            [loss_weight_learner.scheduler.lr()])

            del total_loss, losses

            i += 1

        if not is_master:
            continue

        #
        # VALIDATION CROSS-ENTROPIES
        #
        model.eval()
        log.eval(len_dataset=dataset.val_size)

        with torch.no_grad():
            for batch in dataset.val:
                try:
                    _, _, stats = model(Batch.to(batch, gpu))

                    batch_size = batch["every_input"][0].size(0)
                    log(batch_size, stats, args.frameworks)
                except RuntimeError as e:
                    if 'out of memory' in str(e):
                        print('| WARNING: ran out of memory, skipping batch')
                        if hasattr(torch.cuda, 'empty_cache'):
                            torch.cuda.empty_cache()
                    else:
                        raise e

        lobs = np.array(losses_over_bs)  #changed to be uses with below
        print(
            str(lobs.mean()) + "; " + str(lobs.max()) + "; " +
            str(lobs.min()))  #changed - print loss for epoch
        log.flush()

        #
        # VALIDATION MRP-SCORES
        #
        predict(raw_model,
                dataset.val,
                args.validation_data,
                args,
                directory,
                gpu,
                run_evaluation=True,
                epoch=epoch)

    #
    # TEST PREDICTION
    #
    test_fpath = f"{directory}/test_predictions/"  #changed - catch exists error
    if not os.path.exists(test_fpath):
        os.mkdir(test_fpath)

    #os.mkdir(f"{directory}/test_predictions/")
    predict(raw_model, dataset.test, args.test_data, args,
            f"{directory}/test_predictions/", gpu)