def main():
    """"""
    phones = joblib.load('phones.pkl')
    p2c = utils.phone2class(phones)
    c2p = utils.class2phone(phones)
    # _ = get_language_model()
    # check_language_models(p2c=p2c, c2p=c2p)
    data_old = joblib.load('lm_debug_data.pkl')
    data = joblib.load(
        '/home/john/Documents/School/Fall_2020/Research/DysarthriaAugment/exps/trial_1_attention_vc_one_model_fixed_iterations_CTC_TRAINING/ctc_output_predictions/M14_B3_UW51_M8.pkl'
    )
    data['data'] = data['ctc_outputs']
    data['transcription'] = data['true_phones']
    ctc_output = data['data']
    true_transcription = data['transcription']
    models = get_language_model()
    # lm1 = models['model_1gram']
    # lm2 = models['model_2gram']
    # lm3 = models['model_3gram']
    # lm4 = models['model_4gram']
    # lm5 = models['model_5gram']
    # lm6 = models['model_6gram']
    # lm7 = models['model_7gram']
    # lm8 = models['model_8gram']
    # lm9 = models['model_9gram']
    # lm10 = models['model_10gram']
    # lm11 = models['model_11gram']
    # lm12 = models['model_12gram']
    # lm13 = models['model_13gram']

    _ = get_best_sequence(ctc_output=ctc_output,
                          language_models=models,
                          c2p=c2p,
                          p2c=p2c,
                          true_transcription=true_transcription)
Esempio n. 2
0
 def __init__(self, params):
     'Initialization'
     self.list_IDs = params['files']
     self.mode = params["mode"]
     # self.specaugment = params['specaugment']
     self.wordlist = params['metadata_help']['wordlist']
     self.dictionary = params['metadata_help']['dictionary']
     self.phones = params['metadata_help']['phones']
     self.p2c = phone2class(self.phones)
     self.c2p = class2phone(self.phones)
Esempio n. 3
0
 def __init__(self, params):
     'Initialization'
     self.list_IDs = params['files']
     self.mode = params["mode"]
     self.wordlist = params['metadata_help']['wordlist']
     self.dictionary = params['metadata_help']['dictionary']
     self.phones = params['metadata_help']['phones']
     self.p2c = phone2class(self.phones)
     self.c2p = class2phone(self.phones)
     self.word_list = joblib.load('word_lists.pkl')
def main():
    """"""
    models = get_language_model()
    incorrect = joblib.load(
        './exps/PARTITION_2_trial_1_Oracle_Baseline_CTC_training/ctc_output_predictions_test/M05_B2_CW73_M7.pkl'
    )
    stop = 'None'
    phones = joblib.load('phones.pkl')
    p2c = utils.phone2class(phones)
    c2p = utils.class2phone(phones)
    # # _ = get_language_model()
    # # check_language_models(p2c=p2c, c2p=c2p)
    # data_old = joblib.load('lm_debug_data.pkl')
    # data = joblib.load('/home/john/Documents/School/Fall_2020/Research/DysarthriaAugment/exps/trial_1_attention_vc_one_model_fixed_iterations_CTC_TRAINING/ctc_output_predictions/M14_B3_UW51_M8.pkl')
    # data['data'] = data['ctc_outputs']
    # data['transcription'] = data['true_phones']
    # ctc_output = data['data']
    # true_transcription = data['transcription']
    # models = get_language_model()
    lm1 = models['model_1gram']
    lm2 = models['model_2gram']
    lm3 = models['model_3gram']
    lm4 = models['model_4gram']
    lm5 = models['model_5gram']
    lm6 = models['model_6gram']
    lm7 = models['model_7gram']
    lm8 = models['model_8gram']
    lm9 = models['model_9gram']
    lm10 = models['model_10gram']
    lm11 = models['model_11gram']
    lm12 = models['model_12gram']
    lm13 = models['model_13gram']

    K = 20
    CTC_weight = 1
    LM_weight = 1

    _ = get_best_sequence(ctc_output=incorrect['ctc_outputs'],
                          language_models=models,
                          c2p=c2p,
                          p2c=p2c,
                          true_transcription=incorrect['true_phones'],
                          K=K,
                          CTC_weight=CTC_weight,
                          LM_weight=LM_weight)
Esempio n. 5
0
        'dir'] = './exps/PARTITION_1_trial_2_Limited_Baseline_CTC_TRAINING'
    """Global partition 2 final experiment directories"""
    data['attention']['2'][
        'dir'] = './exps/PARTITION_2_trial_1_attention_vc_CTC_training'
    data['dcgan']['2'][
        'dir'] = './exps/PARTITION_2_trial_1_dcgan_vc_CTC_training'
    data['oracle']['2'][
        'dir'] = './exps/PARTITION_2_trial_1_Oracle_Baseline_CTC_training'
    data['lack']['2'][
        'dir'] = './exps/PARTITION_2_trial_1_Lack_Baseline_CTC_training'
    data['limited']['2'][
        'dir'] = './exps/PARTITION_2_trial_1_Limited_Baseline_CTC_training'

    phones = joblib.load('phones.pkl')
    p2c = utils.phone2class(phones)
    c2p = utils.class2phone(phones)

    no_lm_folder = 'predictions'
    lm_folder = 'language_model_predictions_test'
    for exp_type, exp_data in data.items():
        for partition_number, partition_data in exp_data.items():
            no_lm_dir = os.path.join(partition_data['dir'], no_lm_folder)
            lm_dir = os.path.join(partition_data['dir'], lm_folder)
            partition_data['no_lm_results'] = collect_files(no_lm_dir)
            partition_data['lm_results'] = collect_files(lm_dir)
            """Let's get the levenshtein distances"""
            partition_data['no_lm_scores'] = {}
            for file in partition_data['no_lm_results']:
                file_key = file.split('/')[-1]
                results = joblib.load(file)
                predicted_string = [
def get_language_model():
    if not os.path.exists('3_gram_language_model.pkl') or not os.path.exists(
            '2_gram_language_model.pkl'):
        wordlist = joblib.load('wordlist.pkl')
        dictionary = joblib.load('dict.pkl')
        phones = joblib.load('phones.pkl')
        p2c = utils.phone2class(phones)
        c2p = utils.class2phone(phones)
        """Here, the dictionary is actually the training data for the language model,
        because those are the only possible sequences of phones"""
        training_sentences_ = [value for key, value in dictionary.items()]
        """Add SOS and EOS tokens"""
        training_sentences__ = []
        for sentence in training_sentences_:
            new_sentence = ['SOS']
            new_sentence.extend(sentence)
            new_sentence.append('EOS')
            training_sentences__.append(new_sentence)

        training_sentence_one_list = []
        for sentence in training_sentences__:
            training_sentence_one_list.extend(sentence)
        vocabulary = list(set(training_sentence_one_list))
        """Training sentences need to be list of tuples"""
        training_tuples = []
        for sentence in training_sentences__:
            training_tuples.append(tuple(sentence))

        # lm = MLE(2)
        # dummy_vocab = ['a', 'b', 'c']
        # dummy_text = [[("a", "b"), ("b", "c")]]
        # lm.fit(dummy_text, vocabulary_text=dummy_vocab)
        # # lm.fit([[("a",), ("b",), ("c",)]])
        # SCORE = lm.score("a")

        # n_grams = list(ngrams(training_sentence_one_list, n=N_GRAM))
        """1-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=1))
        model1 = MLE(1)
        model1.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model1, '1_gram_language_model.pkl')
        print("Created 1-gram model...")
        """2-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=2))
        model2 = MLE(2)
        model2.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model2, '2_gram_language_model.pkl')
        print("Created 2-gram model...")
        """3-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=3))
        model3 = MLE(3)
        model3.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model3, '3_gram_language_model.pkl')
        print("Created 3-gram model...")
        """4-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=4))
        model4 = MLE(4)
        model4.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model4, '4_gram_language_model.pkl')
        print("Created 4-gram model...")
        """5-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=5))
        model5 = MLE(5)
        model5.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model5, '5_gram_language_model.pkl')
        print("Created 5-gram model...")
        """6-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=6))
        model6 = MLE(6)
        model6.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model6, '6_gram_language_model.pkl')
        print("Created 6-gram model...")
        """7-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=7))
        model7 = MLE(7)
        model7.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model7, '7_gram_language_model.pkl')
        print("Created 7-gram model...")
        """8-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=8))
        model8 = MLE(8)
        model8.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model8, '8_gram_language_model.pkl')
        print("Created 8-gram model...")
        """9-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=9))
        model9 = MLE(9)
        model9.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model9, '9_gram_language_model.pkl')
        print("Created 9-gram model...")
        """10-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=10))
        model10 = MLE(10)
        model10.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model10, '10_gram_language_model.pkl')
        print("Created 10-gram model...")
        """11-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=11))
        model11 = MLE(11)
        model11.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model11, '11_gram_language_model.pkl')
        print("Created 11-gram model...")
        """12-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=12))
        model12 = MLE(12)
        model12.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model12, '12_gram_language_model.pkl')
        print("Created 12-gram model...")
        """13-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=13))
        model13 = MLE(13)
        model13.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model13, '13_gram_language_model.pkl')
        print("Created 13-gram model...")
        """14-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=14))
        model14 = MLE(14)
        model14.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model14, '14_gram_language_model.pkl')
        print("Created 14-gram model...")
        """15-gram model"""
        all_grams = list(everygrams(training_sentence_one_list, max_len=15))
        model15 = MLE(15)
        model15.fit(text=[all_grams], vocabulary_text=vocabulary)
        joblib.dump(model15, '15_gram_language_model.pkl')
        print("Created 15-gram model...")
        """https://stackoverflow.com/questions/6462709/nltk-language-model-ngram-calculate-the-prob-of-a-word-from-context"""
        """Scroll down a little for relevant answer (you must provide n-1 context for the score function because
        the test symbol completes the n-gram"""
        # EOS_to_SOS_score = model.score(word='b', context=('SOS', 'ax'))
        # print(model.generate(20, random_seed=7))
    else:
        model1 = joblib.load('1_gram_language_model.pkl')
        model2 = joblib.load('2_gram_language_model.pkl')
        model3 = joblib.load('3_gram_language_model.pkl')
        model4 = joblib.load('4_gram_language_model.pkl')
        model5 = joblib.load('5_gram_language_model.pkl')
        model6 = joblib.load('6_gram_language_model.pkl')
        model7 = joblib.load('7_gram_language_model.pkl')
        model8 = joblib.load('8_gram_language_model.pkl')
        model9 = joblib.load('9_gram_language_model.pkl')
        model10 = joblib.load('10_gram_language_model.pkl')
        model11 = joblib.load('11_gram_language_model.pkl')
        model12 = joblib.load('12_gram_language_model.pkl')
        model13 = joblib.load('13_gram_language_model.pkl')
        model14 = joblib.load('14_gram_language_model.pkl')
        model15 = joblib.load('15_gram_language_model.pkl')

    return {
        'model_1gram': model1,
        'model_2gram': model2,
        'model_3gram': model3,
        'model_4gram': model4,
        'model_5gram': model5,
        'model_6gram': model6,
        'model_7gram': model7,
        'model_8gram': model8,
        'model_9gram': model9,
        'model_10gram': model10,
        'model_11gram': model11,
        'model_12gram': model12,
        'model_13gram': model13,
        'model_14gram': model14,
        'model_15gram': model15
    }
Esempio n. 7
0
    def eval(self):
        """Evaluate trained model on test set"""
        if WORDSPLIT:
            train, test = self.get_train_test_wordsplit()
        elif UTTERANCE_SPLIT:
            train, test, val = self.get_train_test_utterance_split()
        wordlist = joblib.load('wordlist.pkl')
        dictionary = joblib.load('dict.pkl')
        phones = joblib.load('phones.pkl')
        metadata_help = {
            'wordlist': wordlist,
            'dictionary': dictionary,
            'phones': phones
        }
        p2c = utils.phone2class(phones)
        c2p = utils.class2phone(phones)
        """Get test generator"""
        test_data = Dataset({
            'files': test,
            'mode': 'eval',
            'metadata_help': metadata_help
        })
        test_gen = data.DataLoader(test_data,
                                   batch_size=1,
                                   shuffle=True,
                                   collate_fn=test_data.collate_eval,
                                   drop_last=True)
        for batch_number, features in tqdm(enumerate(test_gen)):
            spectrograms = features['spectrograms']
            phones = features['phones']
            batch_metadata = features['metadata'][0]
            input_lengths = features['input_lengths']
            self.G = self.G.eval()

            outputs, _ = self.G(spectrograms,
                                input_lengths.unsqueeze(0).long())
            outputs = np.squeeze(outputs.detach().cpu().numpy())
            phones = np.squeeze(phones.detach().cpu().numpy())
            phones = phones.astype(dtype=int)
            phones = [c2p[x] for x in phones]

            output_classes = np.argmax(outputs, axis=1)
            """Decode the output predictions into a phone sequence"""
            # https://stackoverflow.com/questions/38065898/how-to-remove-the-adjacent-duplicate-value-in-a-numpy-array
            duplicates_eliminated = np.asarray(
                [k for k, g in groupby(output_classes)])
            blanks_eliminated = duplicates_eliminated[
                duplicates_eliminated != 0]
            predicted_phones_ = [c2p[x] for x in blanks_eliminated]
            """remove SOS and EOS"""
            predicted_phones = []
            for x in predicted_phones_:
                if x != 'SOS' and x != 'EOS':
                    predicted_phones.append(x)

            data_to_save = {
                'speaker': batch_metadata['speaker'],
                'word': batch_metadata['word'],
                'true_phones': batch_metadata['phones'],
                'predicted_phones': predicted_phones
            }
            dump_path = os.path.join(self.predict_dir,
                                     batch_metadata['utterance'] + '.pkl')
            joblib.dump(data_to_save, dump_path)
Esempio n. 8
0
    def train(self):
        """Create speaker2index and index2speaker"""
        self.speaker2index_and_index2speaker()
        """Initialize history matrix"""
        self.history = np.random.normal(loc=0,
                                        scale=0.1,
                                        size=(len(self.s2i),
                                              config.train.class_history))
        """"""
        """"""
        iterations = 0
        """Get train/test"""
        if WORDSPLIT:
            train, test = self.get_train_test_wordsplit()
        elif UTTERANCE_SPLIT:
            train, test, val = self.get_train_test_utterance_split()
        wordlist = joblib.load('wordlist.pkl')
        dictionary = joblib.load('dict.pkl')
        phones = joblib.load('phones.pkl')
        metadata_help = {
            'wordlist': wordlist,
            'dictionary': dictionary,
            'phones': phones
        }
        p2c = utils.phone2class(phones)
        c2p = utils.class2phone(phones)
        """CTC loss"""
        # self.ctc_loss = nn.CTCLoss(blank=p2c[config.data.PAD_token], reduction='mean')
        self.ctc_loss = nn.CTCLoss(blank=p2c[config.data.PAD_token],
                                   reduction='none')
        self.loss_adv = nn.CrossEntropyLoss(reduction='none')
        for epoch in range(config.train.num_epochs):
            """Make dataloader"""
            train_data = Dataset({
                'files': train,
                'mode': 'train',
                'metadata_help': metadata_help
            })
            train_gen = data.DataLoader(train_data,
                                        batch_size=config.train.batch_size,
                                        shuffle=True,
                                        collate_fn=train_data.collate,
                                        drop_last=True)
            val_data = Dataset({
                'files': val,
                'mode': 'train',
                'metadata_help': metadata_help
            })
            val_gen = data.DataLoader(val_data,
                                      batch_size=config.train.batch_size,
                                      shuffle=True,
                                      collate_fn=val_data.collate,
                                      drop_last=True)
            ####################
            '''
            y_train = []
            for features in train_gen:
                metadata = features["metadata"]
                for x in metadata:
                    y_train.append(x['speaker'])
            y_train = np.array(y_train)
            lb = LabelEncoder()
            y_train = to_categorical(lb.fit_transform(y_train.ravel()), 21)
            print("save classes...")
            np.save('classes'+str(epoch)+'.npy', lb.classes_)
            
            y_train = np.argmax(y_train, axis=1)
            y_train = torch.from_numpy(y_train).long()
            '''
            classes = [
                'CF02', 'CF03', 'CF04', 'CF05', 'CM01', 'CM04', 'CM05', 'CM06',
                'CM08', 'CM09', 'CM10', 'CM12', 'CM13', 'F04', 'F05', 'M05',
                'M08', 'M09', 'M10', 'M11', 'M14'
            ]
            classes_bin = [
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1
            ]
            ###################

            correct = 0
            incorrect = 0
            for batch_number, features in enumerate(train_gen):
                spectrograms = features['spectrograms']
                phones = features['phones']
                input_lengths = features['input_lengths']
                target_lengths = features['target_lengths']
                metadata = features["metadata"]
                batch_speakers = [x['speaker'] for x in metadata]
                ###########
                #labels = np.zeros((config.train.batch_size, 21))
                #for it, x in enumerate(metadata):
                #labels[it, classes.index(x['speaker'])] = 1
                labels = [
                    classes_bin[classes.index(x['speaker'])] for x in metadata
                ]
                #labels = [classes.index(x['speaker']) for x in metadata]
                labels = np.array(labels)
                labels = torch.from_numpy(labels).long().to(self.device)

                self.G = self.G.train()
                """Make input_lengths and target_lengths torch ints"""
                input_lengths = input_lengths.to(torch.int32)
                target_lengths = target_lengths.to(torch.int32)
                phones = phones.to(torch.int32)
                #########
                #ipdb.set_trace()
                outputs_ctc, outputs_adv = self.G(spectrograms,
                                                  input_lengths.long())
                outputs_ctc = outputs_ctc.permute(
                    1, 0,
                    2)  # swap batch and sequence length dimension for CTC loss

                loss = self.ctc_loss(log_probs=outputs_ctc,
                                     targets=phones,
                                     input_lengths=input_lengths,
                                     target_lengths=target_lengths)
                loss_adv = self.loss_adv(outputs_adv, labels)
                #ipdb.set_trace()

                _, predicted = torch.max(outputs_adv.data, -1)
                gt = labels.data
                correct += (predicted == gt).float().sum().cpu().data
                incorrect += (predicted != gt).float().sum().cpu().data
                #ipdb.set_trace()

                #ipdb.set_trace()

                total_loss = loss + loss_adv * self.lambda_adv
                #########
                """Update the loss history"""
                self.update_history(total_loss, batch_speakers)
                if epoch >= config.train.regular_epochs:
                    loss_weights = self.get_loss_weights(batch_speakers,
                                                         type='fair')
                else:
                    loss_weights = self.get_loss_weights(batch_speakers,
                                                         type='unfair')
                total_loss = total_loss * loss_weights
                #ipdb.set_trace()
                # Backward and optimize.
                self.reset_grad()
                # loss.backward()
                total_loss.sum().backward()
                self.g_optimizer.step()
                #counter += 1
                if iterations % self.log_step == 0:
                    accuracy = (100 * correct / (correct + incorrect)).item()
                    print(
                        str(iterations) + ', losses (total, ctc, adv): ' +
                        str(total_loss.sum().item()) + ',' +
                        str(loss.sum().item()) + ',' +
                        str(loss_adv.sum().item()),
                        'adv. accuracy: ' + str(accuracy))
                    correct = 0
                    incorrect = 0
                    if self.use_tensorboard:
                        self.logger.scalar_summary('loss',
                                                   total_loss.sum().item(),
                                                   iterations)
                        self.logger.scalar_summary('accuracy', accuracy,
                                                   iterations)

                if iterations % self.model_save_step == 0:
                    if self.lambda_adv > 6:
                        self.lambda_adv = self.lambda_adv // 2
                    elif self.lambda_adv == 6:
                        self.lambda_adv -= 1
                    """Calculate validation loss"""
                    val_loss, val_accuracy = self.val_loss(
                        val=val_gen, iterations=iterations)
                    print(
                        str(iterations) + ', val_loss: ' + str(val_loss),
                        'val_adv. accuracy: ' + str(val_accuracy.item()))
                    if self.use_tensorboard:
                        self.logger.scalar_summary('val_loss', val_loss,
                                                   iterations)
                        self.logger.scalar_summary('val_accuracy',
                                                   val_accuracy.item(),
                                                   iterations)
                """Save model checkpoints."""
                if iterations % self.model_save_step == 0:
                    G_path = os.path.join(self.model_save_dir,
                                          '{}-G.ckpt'.format(iterations))
                    torch.save(
                        {
                            'model': self.G.state_dict(),
                            'optimizer': self.g_optimizer.state_dict()
                        }, G_path)
                    print('Saved model checkpoints into {}...'.format(
                        self.model_save_dir))

                iterations += 1
Esempio n. 9
0
    def train(self):
        """Create speaker2index and index2speaker"""
        self.speaker2index_and_index2speaker()
        """Initialize history matrix"""
        self.history = np.random.normal(loc=0,
                                        scale=0.1,
                                        size=(len(self.s2i),
                                              config.train.class_history))
        """"""
        """"""
        iterations = 0
        """Get train/test"""
        if WORDSPLIT:
            train, test = self.get_train_test_wordsplit()
        elif UTTERANCE_SPLIT:
            train, test, val = self.get_train_test_utterance_split()
        wordlist = joblib.load('wordlist.pkl')
        dictionary = joblib.load('dict.pkl')
        phones = joblib.load('phones.pkl')
        metadata_help = {
            'wordlist': wordlist,
            'dictionary': dictionary,
            'phones': phones
        }
        p2c = utils.phone2class(phones)
        c2p = utils.class2phone(phones)
        """CTC loss"""
        # self.ctc_loss = nn.CTCLoss(blank=p2c[config.data.PAD_token], reduction='mean')
        self.ctc_loss = nn.CTCLoss(blank=p2c[config.data.PAD_token],
                                   reduction='none')
        for epoch in range(config.train.num_epochs):
            """Make dataloader"""
            train_data = Dataset({
                'files': train,
                'mode': 'train',
                'metadata_help': metadata_help
            })
            train_gen = data.DataLoader(train_data,
                                        batch_size=config.train.batch_size,
                                        shuffle=True,
                                        collate_fn=train_data.collate,
                                        drop_last=True)
            val_data = Dataset({
                'files': val,
                'mode': 'train',
                'metadata_help': metadata_help
            })
            val_gen = data.DataLoader(val_data,
                                      batch_size=config.train.batch_size,
                                      shuffle=True,
                                      collate_fn=val_data.collate,
                                      drop_last=True)

            for batch_number, features in enumerate(train_gen):
                spectrograms = features['spectrograms']
                phones = features['phones']
                input_lengths = features['input_lengths']
                target_lengths = features['target_lengths']
                metadata = features["metadata"]
                batch_speakers = [x['speaker'] for x in metadata]
                self.G = self.G.train()

                #ipdb.set_trace()
                """Make input_lengths and target_lengths torch ints"""
                input_lengths = input_lengths.to(torch.int32)
                target_lengths = target_lengths.to(torch.int32)
                phones = phones.to(torch.int32)

                outputs = self.G(spectrograms)

                outputs = outputs.permute(
                    1, 0,
                    2)  # swap batch and sequence length dimension for CTC loss

                loss = self.ctc_loss(log_probs=outputs,
                                     targets=phones,
                                     input_lengths=input_lengths,
                                     target_lengths=target_lengths)
                """Update the loss history"""
                self.update_history(loss, batch_speakers)
                if epoch >= config.train.regular_epochs:
                    loss_weights = self.get_loss_weights(batch_speakers,
                                                         type=types[0])
                else:
                    loss_weights = self.get_loss_weights(batch_speakers,
                                                         type=types[1])
                loss = loss * loss_weights

                # Backward and optimize.
                self.reset_grad()
                # loss.backward()
                loss.sum().backward()
                self.g_optimizer.step()

                if iterations % self.log_step == 0:
                    print(
                        str(iterations) + ', loss: ' + str(loss.sum().item()))
                    if self.use_tensorboard:
                        self.logger.scalar_summary('loss',
                                                   loss.sum().item(),
                                                   iterations)

                if iterations % self.model_save_step == 0:
                    """Calculate validation loss"""
                    val_loss = self.val_loss(val=val_gen,
                                             iterations=iterations)
                    print(str(iterations) + ', val_loss: ' + str(val_loss))
                    if self.use_tensorboard:
                        self.logger.scalar_summary('val_loss', val_loss,
                                                   iterations)
                """Save model checkpoints."""
                if iterations % self.model_save_step == 0:
                    G_path = os.path.join(self.model_save_dir,
                                          '{}-G.ckpt'.format(iterations))
                    torch.save(
                        {
                            'model': self.G.state_dict(),
                            'optimizer': self.g_optimizer.state_dict()
                        }, G_path)
                    print('Saved model checkpoints into {}...'.format(
                        self.model_save_dir))

                iterations += 1