Ejemplo n.º 1
0
def infer(minmax, data_train, data_test):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # lstm_train_model = LSTM()
    model = LSTM().to(device)
    model.load_state_dict(
        torch.load("D:\stock\weights\checkpont_67.27376310428824.pth"))
    model.eval()
    test_size = len(data_test)
    future_day = test_size
    timestamp = 5
    output_predict = np.zeros(
        (data_train.shape[0] + future_day, data_train.shape[1]))
    output_predict[0] = data_train.iloc[0]
    for k in range(0, (data_train.shape[0] // timestamp) * timestamp,
                   timestamp):
        index = min(k + timestamp, output_predict.shape[0] - 1)
        batch_x = np.expand_dims(df.iloc[k:index, :].values, axis=0)
        batch_y = df.iloc[k + 1:index + 1, :].values
        batch_x = torch.Tensor(batch_x).to(device)
        batch_y = torch.Tensor(batch_y).to(device)
        out_logits = model(batch_x)
        # init_value = last_state
        output_predict[k + 1:k + timestamp +
                       1] = out_logits.cpu().detach().numpy()[0]
    output_predict = minmax.inverse_transform(output_predict)
    return output_predict
Ejemplo n.º 2
0
    def show_result(self):

        files = os.listdir(self.output)
        for file in files:
            if ".pth" in file:
                path = os.path.join(self.output, file)
                lstm_model = LSTM(self.input_size, self.output_size,
                                  self.nb_neurons)
                lstm_model.load_state_dict(torch.load(path))
                lstm_model.eval()
                print("model : %s loaded" % path)
                predictions = []

                for (x, _) in self.testing_dataloader:
                    if x.shape[0] == self.batch_size:
                        with torch.no_grad():
                            lstm_model.hidden_cell = (
                                torch.zeros(1, self.batch_size,
                                            lstm_model.nb_neurons),
                                torch.zeros(1, self.batch_size,
                                            lstm_model.nb_neurons))
                            output = lstm_model(x.float())
                            output = self.data.unnormalizeData(
                                output).squeeze()
                            predictions += output.tolist()

                plt.plot(predictions, label="prediction")
                plt.plot(self.real_data_test, label="target")
                plt.title(file)
                plt.legend()
                plt.show()
Ejemplo n.º 3
0
def test(test, feature, model, hidden, layer,  output, index2char, index2phone, phone_map, phone2index):
	ans = open(output,'w')
	ans.write('id,phone_sequence\n')
	test_set = Feature_Dataset(feature,'test')
	if feature == 'mfcc':
		feature_dim = 39
	elif feature == 'fbank':
		feature_dim = 69
	elif feature == 'all':
		feature_dim = 108
	
	if model == 'LSTM':
		test_model = LSTM(feature_dim, hidden, layer)
	elif model == 'BiLSTM':
		test_model = LSTM(feature_dim,hidden,layer,bi = True)
	elif model == 'C_RNN':
		group_size = 5
		test_model = C_RNN(group_size, feature_dim, hidden, layer)    
	
	checkpoint = torch.load(test)
	test_model.load_state_dict(checkpoint['model'])
	test_model.eval()
	if USE_CUDA:
		test_model = test_model.cuda()		
	for i in tqdm(range(1,len(test_set)+1)):
		data = test_set[i-1]
		speaker = data[0]
		test_feature = Variable(data[1].float())
		test_hidden = test_model.init_hidden()
		output = torch.max(test_model(test_feature,test_hidden),1)[1]
		result = test_trim(index2char,index2phone, phone_map, phone2index, output.data.cpu().numpy())
		ans.write('{},{}\n'.format(speaker,result))
	ans.close()
Ejemplo n.º 4
0
def main(args):
    if args.model == 'base':
        postprocessing = None
    elif args.model == 'jump':
        postprocessing = pick_fix_length(400, PAD_TOKEN)
    TEXT = data.Field(lower=True,
                      postprocessing=postprocessing,
                      pad_token=PAD_TOKEN,
                      include_lengths=True)
    LABEL = data.Field(sequential=False, pad_token=None, unk_token=None)

    train, test = datasets.IMDB.splits(TEXT, LABEL)

    TEXT.build_vocab(train)
    LABEL.build_vocab(train)

    train_iter, test_iter = data.BucketIterator.splits(
        (train, test),
        batch_sizes=(args.batch, args.batch * 4),
        device=args.gpu,
        repeat=False,
        sort_within_batch=True)

    if args.model == 'base':
        model = LSTM(len(TEXT.vocab), 300, 128, len(LABEL.vocab))
    elif args.model == 'jump':
        model = LSTMJump(len(TEXT.vocab), 300, 128, len(LABEL.vocab), args.R,
                         args.K, args.N, 80, 8)
    model.load_pretrained_embedding(
        get_word2vec(TEXT.vocab.itos,
                     '.vector_cache/GoogleNews-vectors-negative300.bin'))
    model.cuda(args.gpu)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    max_accuracy = 0
    for i in range(args.epoch):
        print('Epoch: {}'.format(i + 1))
        sum_loss = 0
        model.train()
        for batch in train_iter:
            optimizer.zero_grad()
            xs, lengths = batch.text
            loss = model(xs, lengths, batch.label)
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 1.)
            optimizer.step()
            sum_loss += loss.data[0]
        print(f'Loss: {sum_loss / len(train_iter)}')
        sum_correct = 0
        total = 0
        model.eval()
        for batch in test_iter:
            y = model.inference(*batch.text)
            sum_correct += y.eq(batch.label).sum().float()
            total += batch.label.size(0)
        accuracy = (sum_correct / total).data[0]
        max_accuracy = max(accuracy, max_accuracy)
        print(f'Accuracy: {accuracy}')
    print(f'Max Accuracy: {max_accuracy}')
Ejemplo n.º 5
0
def validate():

    stock = "MC.PA"
    directory = "/Users/baptiste/Desktop/training"

    input_size = 4
    output_size = 4
    nb_neurons = 200

    test_split = 0.1
    time_window = 5

    dataloader = Data(stock)
    df = dataloader.getData()
    real_data = df.to_numpy()
    df_normalized = dataloader.normalizeData(df)
    df_normalized = torch.FloatTensor(df_normalized.to_numpy())

    test_split = int(test_split * df.shape[0])
    real_test_split = real_data[-test_split:-time_window:, 3]
    testing_split = df_normalized[-test_split:, :]

    files = os.listdir(directory)

    for file in files:
        if ".pth" in file:
            path = os.path.join(directory, file)
            lstm_model = LSTM(input_size, output_size, nb_neurons)
            lstm_model.load_state_dict(torch.load(path))
            print("model : %s loaded" % path)

            lstm_model.eval()

            predictions = []

            for i in range(testing_split.shape[0] - time_window):

                x_test = testing_split[i:i + time_window]

                with torch.no_grad():

                    lstm_model.hidden_cell = (torch.zeros(
                        1, 1, lstm_model.nb_neurons),
                                              torch.zeros(
                                                  1, 1, lstm_model.nb_neurons))
                    predictions.append(
                        dataloader.unnormalizeData(
                            lstm_model(x_test).tolist()))
            predictions = np.array(predictions)[:, 3, 0]

            #plt.figure(15,10)
            plt.plot(real_test_split, label="target")
            plt.plot(predictions, label="prediction")
            plt.title(file)
            plt.legend()
            plt.show()
Ejemplo n.º 6
0
def get_bot_response2():
    try:
        device = torch.device("cpu")

        with open('data2.json', 'r') as instances:
            data = json.load(instances)

        FILE = "dataserialized2.pth"
        dataserialized = torch.load(FILE)

        seq_length = dataserialized["seq_length"]
        input_size = dataserialized["input_size"]
        hidden_size = dataserialized["hidden_size"]
        num_layers = dataserialized["num_layers"]
        num_classes = dataserialized["num_classes"]
        word_list = dataserialized["word_list"]
        tags = dataserialized["tags"]
        model_state = dataserialized["model_state"]

        model = LSTM(seq_length, input_size, hidden_size, num_layers,
                     num_classes).to(device)
        model.load_state_dict(model_state)
        model.eval()
    except Exception as e:
        print(e)
    if request.method == "POST":
        bot = "Convo"
        user_data = request.json

        sentence = user_data['message']  #
        sentence = normalization(sentence)
        sentence = tokenization(sentence)
        x = bag_of_words(sentence, word_list)
        x = torch.from_numpy(x)
        x = x.reshape(-1, x.shape[0])
        x = x.to(device)  # x=torch.tensor(x)# print(x.shape)

        output, hidden = model(x)
        _, predicted = torch.max(output, dim=1)
        tag = tags[predicted.item()]

        prob = torch.softmax(output, dim=1)
        probability = prob[0][predicted.item()]

        if (probability.item() > 0.80):

            for i in data['data']:
                if tag == i['tag']:
                    return jsonify(random.choice(i['bot_responses']))
        else:
            return jsonify("I do not understand...")
Ejemplo n.º 7
0
def train():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    seq, n_batch, n_vocab = get_data_from_file(flags.trainfile,
                                               flags.batch_size,
                                               flags.seq_size)

    model = LSTM(n_vocab, flags.seq_size, flags.embedding_size,
                 flags.lstm_size).to(device)

    #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.7)
    optimizer = torch.optim.Adam(model.parameters(), lr=flags.lr)
    loss_function = nn.CrossEntropyLoss()

    total_acc = []
    total_loss = []

    for e in range(flags.num_epochs):
        (state_h_1,
         state_c_1), (state_h_2,
                      state_c_2) = model.zero_state(flags.batch_size)
        state_h_1 = state_h_1.to(device)
        state_c_1 = state_c_1.to(device)
        state_h_2 = state_h_2.to(device)
        state_c_2 = state_c_2.to(device)

        model.train()
        epoch_acc = []
        epoch_loss = []
        for i, (x,
                y) in enumerate(batch(seq, n_batch, flags.batch_size, device)):
            optimizer.zero_grad()

            logits, (state_h_1, state_c_1), (state_h_2, state_c_2) = model(
                x, (state_h_1, state_c_1), (state_h_2, state_c_2))

            #print("shape input {} , shape output {} ".format(np.shape(x),np.shape(logits)))

            #print(np.shape(logits),np.shape(y))
            loss = loss_function(logits, y)

            resp = logits.detach().cpu()

            if e == 90:
                stream(x, y, resp)
            acc = accuracy(y, resp)
            epoch_acc.append(acc)
            state_h_1 = state_h_1.detach()
            state_c_1 = state_c_1.detach()
            state_h_2 = state_h_2.detach()
            state_c_2 = state_c_2.detach()

            loss_value = loss.item()
            epoch_loss.append(loss_value)
            loss.backward()
            _ = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               flags.gradients_norm)
            optimizer.step()

        model.eval()
        epoch_acc_test = []

        print("epoch : {}  loss {} acc train : {} ".format(
            e, np.mean(epoch_loss), np.mean(epoch_acc)))
        total_acc.append(np.mean(epoch_acc))
        total_loss.append(np.mean(epoch_loss))
    """for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.data)"""

    return model, total_acc, total_loss, flags.lr
    print('model loaded')

    ## get positional code ##
    if opt['test']['use_ztta']:
        ztta = gen_ztta().cuda()
    # print('ztta:', ztta.size())
    # assert 0
    version = opt['test']['version']

    # writer = SummaryWriter(log_dir)
    loss_total_min = 10000000.0
    for epoch in range(opt['test']['num_epoch']):
        state_encoder.eval()
        offset_encoder.eval()
        target_encoder.eval()
        lstm.eval()
        decoder.eval()
        loss_total_list = []

        for i_batch, sampled_batch in enumerate(lafan_loader_test):
            # if i_batch != 33:
            #     continue
            pred_img_list = []
            gt_img_list = []
            img_list = []

            # print(i_batch, sample_batched['local_q'].size())

            loss_pos = 0
            loss_quat = 0
            loss_contact = 0
Ejemplo n.º 9
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)

    idx_to_word, word_to_idx, vocab_size, in_text, out_text = read_file(
        train_file, batch_size, seq_size)

    num_batches, _ = in_text.shape

    val_index = np.random.choice(np.arange(num_batches),
                                 int(num_batches * val_data_proportion),
                                 replace=False)
    train_index = np.delete(np.arange(num_batches), val_index)

    train_in_text = in_text[train_index, :]
    train_out_text = out_text[train_index, :]

    val_in_text = in_text[val_index, :]
    val_out_text = out_text[val_index, :]

    # print(num_batches)
    # print(train_in_text.shape)
    # print(val_in_text.shape)
    # print(vocab_size)

    lstm_model = LSTM(vocab_size, seq_size, emb_size, hidden_size)
    lstm_model = lstm_model.to(device)

    lstm_optim = optim.Adam(lstm_model.parameters(), lr=l_rate)
    loss_function = torch.nn.CrossEntropyLoss()

    train_set_loss = []
    val_set_loss = []

    for i in range(epoch):
        train_batches = generate_batch(train_in_text, train_out_text,
                                       batch_size, seq_size)
        val_batches = generate_batch(val_in_text, val_out_text, batch_size,
                                     seq_size)
        h0, c0 = lstm_model.initial_state(batch_size)
        h0 = h0.to(device)
        c0 = c0.to(device)
        total_loss, iterations, val_loss, val_iterations = 0, 0, 0, 0

        # training_batch
        for x, y in train_batches:
            iterations += 1
            lstm_model.train()
            # shape of x is (batch_size, seq_size)
            x = torch.tensor(x).to(device)
            y = torch.tensor(y).to(device)

            lstm_optim.zero_grad()
            logits, (h0, c0) = lstm_model(x, (h0, c0))
            _, _, n_cat = logits.shape
            loss = loss_function(logits.view(-1, n_cat), y.view(-1))
            total_loss += loss.item()
            loss.backward()

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            h0 = h0.detach()
            c0 = c0.detach()

            _ = torch.nn.utils.clip_grad_norm_(lstm_model.parameters(),
                                               gradients_norm)
            lstm_optim.step()
            # break

        for x_val, y_val in val_batches:

            val_iterations += 1
            lstm_model.eval()

            x_val = torch.tensor(x_val).to(device)
            y_val = torch.tensor(y_val).to(device)

            logits, (h0, c0) = lstm_model(x_val, (h0, c0))

            _, _, n_cat = logits.shape
            loss = loss_function(logits.view(-1, n_cat), y_val.view(-1))
            val_loss += loss.item()

        avg_loss = total_loss / iterations
        val_avg_loss = val_loss / val_iterations
        train_set_loss.append(avg_loss)
        val_set_loss.append(val_avg_loss)

        print('Epoch: {}'.format(i), 'Loss: {}'.format(avg_loss),
              'Validation Loss: {}'.format(val_avg_loss))
        # if i % 10 == 0:
        # 	torch.save(lstm_model.state_dict(),'checkpoint_pt/model-{}.pth'.format(i))
    _ = predict(device,
                lstm_model,
                vocab_size,
                word_to_idx,
                idx_to_word,
                top_k=predict_top_k)
    return train_set_loss, val_set_loss
Ejemplo n.º 10
0
def main():
    if dataset == 'train' or dataset == 'val':
        data_dir = './data/train.tsv'
        data_x, data_y = load_data(data_dir,
                                   _type='train',
                                   _sent_only=sent_only)
    else:
        data_dir = './data/test.tsv'
        data_x = load_data(data_dir, _type='test', _sent_only=sent_only)

    print('Load data with size', len(data_x))
    _dict = load_dict(in_dir)
    data_x = word2index(data_x, _dict)
    data_x, lengths = Padding(data_x)

    if dataset == 'train' or dataset == 'val':
        train_x, train_y, train_l, valid_x, valid_y, valid_l = \
            Split_data(data_x, data_y, lengths, 0.8)
        if dataset == 'train':
            data_x, data_y, lengths = train_x, train_y, train_l
        if dataset == 'val':
            data_x, data_y, lengths = valid_x, valid_y, valid_l
    data_set = Data.TensorDataset(data_x, lengths)

    test_loader = Data.DataLoader(dataset=data_set,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=0)
    print('Dataset load done')

    dim_in = len(_dict)
    dim_out = 5
    model = LSTM(dim_in, dim_out, input_size=embed_size,
                 device=device).to(device)
    if _step == 0: model_name = 'best.pth'
    else: model_name = ('model%d.pth' % _step)
    model_dir = os.path.join(os.path.join(in_dir, 'checkpoint'), model_name)
    if device == 'cuda': weight_dict = torch.load(model_dir)
    else: weight_dict = torch.load(model_dir, map_location='cpu')
    model.load_state_dict(weight_dict)
    print('Model load done')

    with torch.no_grad():
        predict_ans = torch.LongTensor(0).to(device)
        model.eval()
        for step, (inputs, lengths) in enumerate(test_loader):
            inputs = inputs.to(device)
            lengths = lengths.to(device)
            outputs = model(inputs, lengths)
            predict = outputs.argmax(dim=1)
            predict_ans = torch.cat((predict_ans, predict))
            if step % 10 == 0:
                print('eval step %d' % step)
    if dataset == 'test':
        write_csv(in_dir, predict_ans)
        print('Test done')
    else:
        diff_matrix = np.zeros([5, 5])
        data_y = np.array(data_y)
        predict_ans = np.array(predict_ans)
        Test_Acc = 0
        for x, y in zip(predict_ans, data_y):
            diff_matrix[x, y] += 1
            Test_Acc += (x == y)
        for i in range(5):
            data_size = len(np.where(data_y == i)[0])
            if data_size > 0:
                diff_matrix[:, i] /= data_size
        diff_matrix[np.where(diff_matrix < 1e-3)] = 0

        print("diff_matrix:")
        print(diff_matrix)
        print(len(data_y), Test_Acc)
        Test_Acc /= len(data_y)

        print("Test_Acc:", Test_Acc)
Ejemplo n.º 11
0
def train(config, start_epoch=1, best_validation_loss=np.inf):
    """Trains AWD-LSTM model using parameters from config."""
    print(f'Training for {config.epochs} epochs using the {config.dataset}',
          f'dataset with lambda value of {config.encoding_lmbd}')

    device = torch.device(config.device)
    dataLoader = DataLoader(config.dataset, config.batch_size, device,
                            config.bptt)
    model = LSTM(embedding_size=config.embedding_size,
                 hidden_size=config.hidden_size,
                 lstm_num_layers=config.lstm_num_layers,
                 vocab_size=len(dataLoader.corpus.dictionary),
                 batch_size=config.batch_size,
                 dropoute=config.dropoute,
                 dropouti=config.dropouti,
                 dropouth=config.dropouth,
                 dropouto=config.dropouto,
                 weight_drop=config.weight_drop,
                 tie_weights=config.tie_weights,
                 device=device)

    # D is set of gendered words, N is neutral words (not entirely correct, but close enough)
    D, N = get_gendered_words(config.dataset, dataLoader.corpus)

    criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=config.learning_rate,
                                weight_decay=config.weight_decay)

    def using_asgd():
        """Checks if optimizer is using ASGD"""
        return 't0' in optimizer.param_groups[0]

    if not config.overwrite and check_model_exists(config):
        print("Loading model from precious state")
        model, optimizer, start_epoch, best_validation_loss = load_current_state(
            model, optimizer, config)
        if using_asgd():
            temp = torch.optim.ASGD(model.parameters(),
                                    lr=config.learning_rate,
                                    t0=0,
                                    lambd=0.,
                                    weight_decay=config.weight_decay)
            temp.load_state_dict(optimizer.state_dict())
            optimizer = temp
        print("start epoch", start_epoch)

    params = list(model.parameters()) + list(criterion.parameters())

    val_losses = deque(maxlen=config.nonmono)

    for e in range(start_epoch, config.epochs + 1):
        epoch_done = False
        model.train()
        model.initialize_hidden()

        epoch_loss = 0  # Loss over the epoch
        n_batch = 0  # Number of batches that have been done
        t_start = time.time()
        print(f"starting epoch {e}/{config.epochs}")

        while not epoch_done:
            lr = optimizer.param_groups[0]['lr']

            # tr_batch, tr_labels are matrices with horizontal sequences.
            # seq_len is the sequence length in this iteration of the epoch,
            # see the openreviewpaper mentioned in the dataloader file
            tr_batch, tr_labels, seq_len, epoch_done = dataLoader.get_train_minibatch(
            )

            # Rescale learning rate for sequence length
            optimizer.param_groups[0]['lr'] = lr * seq_len / config.bptt

            n_batch += 1
            model.detach_hidden()  # Need to prevent improper backprop
            optimizer.zero_grad()

            out, _, lstm_raw_out, lstm_drop_out = model(tr_batch,
                                                        return_out=True)
            loss = criterion(out.permute(0, 2, 1), tr_labels.t())

            # AR optimisation
            if config.alpha:
                loss += config.alpha * lstm_drop_out.pow(2).mean()

            # TAR optimisation
            if config.beta:
                loss += config.beta * (lstm_raw_out[1:] -
                                       lstm_raw_out[:-1]).pow(2).mean()

            # Encoding bias regularization
            if config.encoding_lmbd > 0:
                loss += bias_regularization_term(model.embed.weight, D, N,
                                                 config.bias_variation,
                                                 config.encoding_lmbd)

            # Decoding bias regularization
            if config.decoding_lmbd > 0:
                loss += bias_regularization_term(model.decoder.weight, D, N,
                                                 config.bias_variation,
                                                 config.decoding_lmbd)

            loss.backward()

            # Gradient clipping added to see effects. Turned off by default
            if config.clip: torch.nn.utils.clip_grad_norm_(params, config.clip)

            optimizer.step()

            # Add current loss to epoch loss
            epoch_loss += loss.item()

            # Return learning rate to default
            optimizer.param_groups[0]['lr'] = lr

            # Evaluate the training
            if n_batch % config.batch_interval == 0:
                cur_loss = epoch_loss / n_batch
                elapsed = float(time.time() - t_start)
                examples_per_second = n_batch / elapsed
                print(
                    '| epoch {:3d} | {:5d} batch | lr {:05.5f} | batch/s {:5.2f} | '
                    'train loss {:5.2f} | perplexity {:5.2f} |'.format(
                        e, n_batch, optimizer.param_groups[0]['lr'],
                        examples_per_second, cur_loss, np.exp(cur_loss)))

        print("Saving current model")
        save_current_state(model, optimizer, e, best_validation_loss, config)

        # Evaluate the model on the validation set for early stopping
        if e % config.eval_interval == 0:
            print('Evaluating on validation for early stopping criterion')
            test_done = False
            model.initialize_hidden()
            model.eval()
            epoch_loss = 0
            n_batch = 0
            tot_seq_len = 0
            while not test_done:
                n_batch += 1
                va_batch, va_labels, seq_len, test_done = dataLoader.get_validation_minibatch(
                )
                tot_seq_len += seq_len
                out, _ = model(va_batch)
                model.detach_hidden()
                loss = criterion(out.permute(0, 2, 1), va_labels.t())
                epoch_loss += loss.item()
            cur_loss = epoch_loss / n_batch

            if best_validation_loss > cur_loss:
                print("best_validation_loss > cur_loss")
                best_validation_loss = cur_loss
                val_losses.append(cur_loss)
                save_for_early_stopping(model, config, best_validation_loss)

            print(
                '| epoch {:3d} | lr {:05.5f} | validation loss {:5.2f} | perplexity {:5.2f} |'
                .format(e, optimizer.param_groups[0]['lr'], cur_loss,
                        np.exp(cur_loss)))

            if not config.no_asgd and not using_asgd() and (
                    len(val_losses) == val_losses.maxlen
                    and cur_loss > min(val_losses)):
                print('Switching to ASGD')
                optimizer = torch.optim.ASGD(model.parameters(),
                                             lr=config.learning_rate,
                                             t0=0,
                                             lambd=0.,
                                             weight_decay=config.weight_decay)

        # Evaluate the model on the test set
        if e % config.eval_interval == 0:
            print('Evaluating on test')
            test_done = False
            model.eval()
            model.initialize_hidden()
            epoch_loss = 0
            n_batch = 0
            while not test_done:
                n_batch += 1
                te_batch, te_labels, seq_len, test_done = dataLoader.get_test_minibatch(
                )
                out, _ = model(te_batch)
                model.detach_hidden()
                loss = criterion(out.permute(0, 2, 1), te_labels.t())
                epoch_loss += loss.item()
            cur_loss = epoch_loss / n_batch

            print(
                '| epoch {:3d} | lr {:05.5f} | test loss {:5.2f} | perplexity {:5.2f} |'
                .format(e, optimizer.param_groups[0]['lr'], cur_loss,
                        np.exp(cur_loss)))

    print(
        f'Training is done. Best validation loss: {best_validation_loss}, validation perplexity: {np.exp(best_validation_loss)}'
    )
Ejemplo n.º 12
0
def main():
    train_dir = './data/train.tsv'
    train_x, train_y = load_data(train_dir)
    print('Load train data with size', len(train_x))
    
    init_output_log(out_dir)
    _dict = build_dict(train_x, out_dir)
    dim_in = len(_dict)
    dim_out = 5
    train_x = word2index(train_x, _dict)
    train_x, lengths = Padding(train_x)
    train_y = torch.LongTensor(train_y)

    train_x, train_y, train_l, valid_x, valid_y, valid_l = \
        Split_data(train_x, train_y, lengths, split_rate)
    train_set = Data.TensorDataset(train_x, train_y, train_l)
    if weighted==True:
        samples_weight = get_samples_weight(train_y)
        sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    else: sampler = None
    train_loader = Data.DataLoader(
		dataset = train_set,
		batch_size = batch_size,
		shuffle = False,
		num_workers = 0,
        sampler = sampler
	)
    valid_set = Data.TensorDataset(valid_x, valid_y, valid_l)
    valid_loader = Data.DataLoader(
		dataset = valid_set,
		batch_size = batch_size,
		shuffle = False,
		num_workers = 0
	)
    print('Dataset load done')

    if embd_path is not None:
        pretrain_embd = load_embadding(embd_path, embed_size, _dict)
    else: pretrain_embd = None
    
    model = LSTM(dim_in, dim_out, \
                input_size=embed_size, \
                device=device, drop_rate=drop_rate, \
                pretrain_embd = pretrain_embd,
                num_layers = num_layers,
                freeze = freeze ).to(device)
    optimizer = Adam(model.parameters(), lr=init_LR, weight_decay=1e-4)
    loss_fn = nn.CrossEntropyLoss()
    print('Model load done')

    print('Trainning start')
    timer = Timer(epoch_size)
    min_loss = 100
    max_val_acc = 0
    Count = 0
    for epoch in range(epoch_size):
        if Count>=5:
            Count = 0
            lr = optimizer.param_groups[0]['lr']
            Decay_LR(optimizer, 0.1)
            lr_new = optimizer.param_groups[0]['lr']
            assert lr != lr_new

        train_loss = 0
        train_acc = 0
        iter_size = 0
        model.train()
        for step, (inputs, labels, lengths) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            lengths = lengths.to(device)
            outputs = model(inputs, lengths)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            predict = outputs.argmax(dim=1)
            train_acc += len(np.where(predict == labels)[0])
            train_loss += loss
            iter_size += 1
            if (step+1)%250 ==0:
                print("[Iter] epoch %d, iter %d, loss %f"%(epoch, step, loss))
        train_loss /= iter_size
        train_acc /= len(train_x)
        print("[Epoch] epoch %d, train_loss %f, train_acc %f"%(epoch, train_loss, train_acc))
        save_data(out_dir, train_loss, 'train_loss.txt')
        save_data(out_dir, train_acc, 'train_acc.txt')
        if train_loss < min_loss:
            min_loss = train_loss; Count = 0
        else: Count += 1

        if len(valid_x)>0:
            valid_loss = 0
            valid_acc = 0
            iter_size = 0
            with torch.no_grad():
                model.eval()
                for step, (inputs, labels, lengths) in enumerate(valid_loader):
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    lengths = lengths.to(device)
                    outputs = model(inputs, lengths)
                    loss = loss_fn(outputs, labels)
                    predict = outputs.argmax(dim=1)
                    valid_acc += len(np.where(predict == labels)[0])
                    valid_loss += loss
                    iter_size += 1
            
            valid_loss /= iter_size
            valid_acc /= len(valid_x)
            print("[Epoch] epoch %d, valid_loss %f, valid_acc %f"%(epoch, valid_loss, valid_acc))
            save_data(out_dir, valid_loss, 'valid_loss.txt')
            save_data(out_dir, valid_acc, 'valid_acc.txt')
            if valid_acc > max_val_acc:
                max_val_acc = valid_acc
                print('epoch %d, best val acc: %f'%(epoch, valid_acc))
                save_model(out_dir, model, 'best.pth')
        
        if (epoch+1)%20==0:
            save_model(out_dir, model, 'model%d.pth'%(epoch))
        timer.update()
        
    
    print('Training end')
def main(trial_num):
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_type = "lstm"

    # Hyper-parameters
    sequence_length = 28
    input_size = 28
    num_layers = 1
    hidden_size = 128
    num_classes = 10
    batch_size = 100
    num_epochs = 20
    learning_rate = 0.01
    num_trials = 100
    a_range = [1.0, 3.0]
    # a_s = [1.5, 2.0, 2.2, 2.5, 2.7, 3.0]

    # just for testing
    # num_trials = 1
    # num_epochs = 20
    # a_s = [1.0]

    # for a in a_s:
    trials = {}
    for num_trial in range(num_trials):
        a = random.random() * (a_range[1] - a_range[0]) + a_range[0]
        print('trial Num: ', trial_num,  "a: ", a, "num_trial: ", num_trial)
        trial = {}
        trial['a'] = a
        # define model
        if model_type == 'lstm':
            model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
        elif model_type == 'gru':
            model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        train_dataloader = MNIST_dataloader(batch_size, train=True)
        test_dataloader = MNIST_dataloader(batch_size, train=False)
        # Train the model
        total_step = len(train_dataloader.dataloader)

        total = 0
        total_loss = 0
        for epoch in range(num_epochs):
            model.train()
            for i, (images, labels) in enumerate(train_dataloader.dataloader):
                images = images.reshape(-1, sequence_length, input_size).to(device)
                labels = labels.to(device)

                # Forward pass
                outputs, hts = model(images)
                loss = criterion(outputs, labels)
                total_loss += loss * labels.size(0)
                total += labels.size(0)
                # print(LEs, rvals)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # if (i + 1) % 300 == 0:
                #     print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                #           .format(epoch + 1, num_epochs, i + 1, total_step, total_loss / total))

            # for i, (name, param) in enumerate(model.named_parameters()):
            #     if i == 3:
            #         print(name, param)
            # Test the model
            model.eval()
            with torch.no_grad():
                correct = 0
                total = 0
                total_loss = 0
                for i, (images, labels) in enumerate(test_dataloader.dataloader):
                    images = images.reshape(-1, sequence_length, input_size).to(device)
                    labels = labels.to(device)
                    outputs, _ = model(images)

                    # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                    # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                    # params = (images, (h, c))
                    # if i == 0:
                    #     LEs, rvals = calc_LEs_an(*params, model=model)

                    loss = criterion(outputs, labels)

                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    total_loss += loss * labels.size(0)
                if epoch == (num_epochs - 1):
                    print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, total_loss / total, 100 * correct / total))
            saved_model = copy.deepcopy(model)
            trial[epoch] = {"model": saved_model, "accuracy": 100 * correct / total, "loss": total_loss / total}
            del saved_model
        trials[num_trial] = trial
        pickle.dump(trials, open('trials/{}/models/{}_{}_trials_{}.pickle'.format(model_type, model_type, hidden_size, trial_num), 'wb'))
Ejemplo n.º 14
0
        os.mkdir("checkpoints")
    if not os.path.exists(os.path.join("checkpoints", "try")):
        os.mkdir(os.path.join("checkpoints", "try"))
    model_out_path = "checkpoints/try/model_epoch_{}.pth".format(epoch)
    torch.save(model, model_out_path)
    print("Checkpoint saved to {}".format("checkpoints" + "try"))


nEpochs = 1
for epoch in range(1, nEpochs + 1):
    train(epoch)
    # if epoch % 5 == 0:
    #     checkpoint(epoch)

predDat = []
model = model.eval()
for step, data in enumerate(test_data, 1):
    seq = ToVariable(data[0])
    trueVal = ToVariable(data[1])
    if use_gpu:
        seq = seq.cuda()
        trueVal = trueVal.cuda()
    predDat = model(seq)
    for i in range(len(predDat[0])):
        if predDat[0][i] < 0:
            predDat[0][i] = 0
        if predDat[0][i] % 1 > 0.3:
            predDat[0][i] = math.ceil(predDat[0][i])
        else:
            predDat[0][i] = math.floor(predDat[0][i])
    loss_int = loss_function(predDat, trueVal)
Ejemplo n.º 15
0
    #load pretrained LSTM model
    conv = None
    if opt.conv:
        conv = LSTM(n_mels)
        files = os.listdir(statepath)
        states = [f for f in files if "lstm_" in f]
        states.sort()
        if not len(states) > 0:
            raise Exception("no states for crnn provided!")
        state = os.path.join(statepath, states[-1])
        if os.path.isfile(state):
            state = torch.load(state)
            conv.load_state_dict(state['state_dict'])
        conv.to(device)
        conv.eval()
        del state
    # print(netG)
    # print(netD)

    criterion = nn.BCELoss()

    fixed_noise = None
    if opt.ae:
        fixed_noise = torch.tensor([
            vae.encode(Mset[i].to(device)).detach().cpu().numpy()
            for i in range(1337, 1337 + opt.batchSize)
        ],
                                   dtype=torch.float32)

        # sample vectors taken from unsmoothened song "Ed Sheeran - Shape of You.mp3"
Ejemplo n.º 16
0
def main():
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_type = "lstm"
    # Hyper-parameters
    sequence_length = 28
    input_size = 28

    num_layers = 1
    num_classes = 10
    batch_size = 100
    num_epochs = 10
    learning_rate = 0.01
    num_trials = 100
    a_s = [2]
    trials = {}

    # just for testing
    num_trials = 1
    num_epochs = 5
    a_s = np.random.uniform(0.1, 2, [2])
    for a in a_s:
        for num_trial in range(num_trials):
            print("a: ", a, "num_trial: ", num_trial)
            hidden_size = 8
            trial = {}
            if model_type == 'lstm':
                model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
            elif model_type == 'gru':
                model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
            # Loss and optimizer
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
            train_dataloader = MNIST_dataloader(batch_size, train=True)
            test_dataloader = MNIST_dataloader(batch_size, train=False)
            # Train the model
            total_step = len(train_dataloader.dataloader)

            for epoch in range(num_epochs):
                model.train()
                for i, (images, labels) in enumerate(train_dataloader.dataloader):
                    images = images.reshape(-1, sequence_length, input_size).to(device)
                    labels = labels.to(device)

                    # Forward pass
                    outputs, hts = model(images)
                    loss = criterion(outputs, labels)
                    # print(LEs, rvals)

                    # Backward and optimize
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    if (i + 1) % 300 == 0:
                        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                              .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))


                # Test the model
                model.eval()
                with torch.no_grad():
                    correct = 0
                    total = 0
                    for i, (images, labels) in enumerate(test_dataloader.dataloader):
                        images = images.reshape(-1, sequence_length, input_size).to(device)
                        labels = labels.to(device)
                        outputs, _ = model(images)

                        # calculate LEs
                        # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                        # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                        # params = (images, (h, c))
                        # if i == 0:
                        #     LEs, rvals = calc_LEs_an(*params, model=model)

                        loss = criterion(outputs, labels)
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                    if epoch == (num_epochs - 1):
                        print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, loss, 100 * correct / total))
                trial[epoch] = {'model': model, 'accuracy': 100 * correct / total,
                                'loss': loss}
            trials[num_trial] = trial
        saved_path = f'../../../dataset/trials/{model_type}/models/'
        pickle.dump(trials, open(f'{saved_path}/lstm_{hidden_size}_trials_0.pickle', 'wb'))
Ejemplo n.º 17
0
class dl_model():
    def __init__(self, mode):

        # read config fiel which contains parameters
        self.config_file = read_yaml()
        self.mode = mode

        arch_name = '_'.join([
            self.config_file['rnn'],
            str(self.config_file['num_layers']),
            str(self.config_file['hidden_dim'])
        ])
        self.config_file['dir']['models'] = self.config_file['dir'][
            'models'].split('/')[0] + '_' + arch_name + '/'
        self.config_file['dir']['plots'] = self.config_file['dir'][
            'plots'].split('/')[0] + '_' + arch_name + '/'

        #if not os.path.exists(self.config_file['dir']['models']):
        #    os.mkdir(self.config_file['dir']['models'])
        #if not os.path.exists(self.config_file['dir']['plots']):
        #    os.mkdir(self.config_file['dir']['plots'])

        if self.config_file['rnn'] == 'LSTM':
            from model import LSTM as Model
        elif self.config_file['rnn'] == 'GRU':
            from model import GRU as Model
        else:
            print("Model not implemented")
            exit(0)

        self.cuda = (self.config_file['cuda'] and torch.cuda.is_available())
        self.output_dim = self.config_file['num_phones']

        if mode == 'train' or mode == 'test':

            self.plots_dir = self.config_file['dir']['plots']
            # store hyperparameters
            self.total_epochs = self.config_file['train']['epochs']
            self.test_every = self.config_file['train']['test_every_epoch']
            self.test_per = self.config_file['train']['test_per_epoch']
            self.print_per = self.config_file['train']['print_per_epoch']
            self.save_every = self.config_file['train']['save_every']
            self.plot_every = self.config_file['train']['plot_every']
            # dataloader which returns batches of data
            self.train_loader = timit_loader('train', self.config_file)
            self.test_loader = timit_loader('test', self.config_file)

            self.start_epoch = 1
            self.test_acc = []
            self.train_losses, self.test_losses = [], []
            # declare model
            self.model = Model(self.config_file,
                               weights=self.train_loader.weights)

        else:

            self.model = Model(self.config_file, weights=None)

        if self.cuda:
            self.model.cuda()

        # resume training from some stored model
        if self.mode == 'train' and self.config_file['train']['resume']:
            self.start_epoch, self.train_losses, self.test_losses, self.test_acc = self.model.load_model(
                mode, self.config_file['rnn'], self.model.num_layers,
                self.model.hidden_dim)
            self.start_epoch += 1

        # load best model for testing/feature extraction
        elif self.mode == 'test' or mode == 'test_one':
            self.model.load_model(mode, self.config_file['rnn'],
                                  self.model.num_layers, self.model.hidden_dim)

        self.replacement = {
            'aa': ['ao'],
            'ah': ['ax', 'ax-h'],
            'er': ['axr'],
            'hh': ['hv'],
            'ih': ['ix'],
            'l': ['el'],
            'm': ['em'],
            'n': ['en', 'nx'],
            'ng': ['eng'],
            'sh': ['zh'],
            'pau':
            ['pcl', 'tcl', 'kcl', 'bcl', 'dcl', 'gcl', 'h#', 'epi', 'q'],
            'uw': ['ux']
        }

    def train(self):

        print("Starting training at t =", datetime.datetime.now())
        print('Batches per epoch:', len(self.train_loader))
        self.model.train()

        # when to print losses during the epoch
        print_range = list(
            np.linspace(0,
                        len(self.train_loader),
                        self.print_per + 2,
                        dtype=np.uint32)[1:-1])

        if self.test_per == 0:
            test_range = []
        else:
            test_range = list(
                np.linspace(0,
                            len(self.train_loader),
                            self.test_per + 2,
                            dtype=np.uint32)[1:-1])

        for epoch in range(self.start_epoch, self.total_epochs + 1):

            print("Epoch:", str(epoch))
            epoch_loss = 0.0
            i = 0

            while True:

                i += 1

                inputs, labels, lens, status = self.train_loader.return_batch()
                inputs, labels, lens = torch.from_numpy(
                    np.array(inputs)).float(), torch.from_numpy(
                        np.array(labels)).long(), torch.from_numpy(
                            np.array(lens)).long()

                if self.cuda:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                    lens = lens.cuda()

                # zero the parameter gradients
                self.model.optimizer.zero_grad()
                # forward + backward + optimize
                outputs = self.model(inputs, lens)
                loss = self.model.calculate_loss(outputs, labels, lens)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.config_file['grad_clip'])
                self.model.optimizer.step()

                # store loss
                epoch_loss += loss.item()

                if i in print_range:
                    try:
                        print(
                            'After %i batches, Current Loss = %.7f, Avg. Loss = %.7f'
                            % (i + 1, epoch_loss / (i + 1),
                               np.mean([x[0] for x in self.train_losses])))
                    except:
                        pass

                if i in test_range:
                    self.test(epoch)
                    self.model.train()

                if status == 1:
                    break

            self.train_losses.append(
                (epoch_loss / len(self.train_loader), epoch))

            # test every 5 epochs in the beginning and then every fixed no of epochs specified in config file
            # useful to see how loss stabilises in the beginning
            if epoch % 5 == 0 and epoch < self.test_every:
                self.test(epoch)
                self.model.train()
            elif epoch % self.test_every == 0:
                self.test(epoch)
                self.model.train()
            # plot loss and accuracy
            if epoch % self.plot_every == 0:
                self.plot_loss_acc(epoch)

            # save model
            if epoch % self.save_every == 0:
                self.model.save_model(False, epoch, self.train_losses,
                                      self.test_losses, self.test_acc,
                                      self.config_file['rnn'],
                                      self.model.num_layers,
                                      self.model.hidden_dim)

    def test(self, epoch=None):

        self.model.eval()
        correct = 0
        total = 0
        correct_nopause = 0
        total_nopause = 0
        pause_id = 27
        # confusion matrix data is stored in this matrix
        matrix = np.zeros((self.output_dim, self.output_dim))
        pad_id = self.output_dim

        print("Testing...")
        print('Total batches:', len(self.test_loader))
        test_loss = 0

        with torch.no_grad():

            while True:

                inputs, labels, lens, status = self.train_loader.return_batch()
                inputs, labels, lens = torch.from_numpy(
                    np.array(inputs)).float(), torch.from_numpy(
                        np.array(labels)).long(), torch.from_numpy(
                            np.array(lens)).long()
                # print(inputs.shape, labels.shape, lens)
                if self.cuda:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                    lens = lens.cuda()

                # zero the parameter gradients
                self.model.optimizer.zero_grad()
                # forward + backward + optimize
                outputs = self.model(inputs, lens)
                loss = self.model.calculate_loss(outputs, labels, lens)
                test_loss += loss.item()

                outputs = outputs.cpu().numpy()
                labels = labels.cpu().numpy(
                )[:, :
                  outputs.shape[1]]  # remove extra padding from current batch
                outputs = np.reshape(
                    outputs[:, :, :-1],
                    (-1, self.output_dim))  # ignore blank token
                labels = np.reshape(labels, (-1))
                total_pad_tokens = np.sum(labels == pad_id)
                argmaxed = np.argmax(outputs, 1)

                # total number of correct phone predictions
                for i in range(len(labels)):
                    if labels[i] != pause_id and labels[
                            i] != pad_id:  # is not pause or pad
                        if argmaxed[i] == labels[i]:
                            correct_nopause += 1
                        total_nopause += 1
                correct += np.sum(argmaxed == labels)
                total += len(argmaxed) - total_pad_tokens

                # matrix[i][j] denotes the no of examples classified by model as class j but have ground truth label i
                for k in range(argmaxed.shape[0]):
                    if labels[k] == pad_id:
                        continue
                    matrix[labels[k]][argmaxed[k]] += 1

                if status == 1:
                    break

        for i in range(self.output_dim):
            matrix[i] /= sum(matrix[i])

        acc_all = correct / total
        acc_nopause = correct_nopause / total_nopause
        print(acc_all, acc_nopause)

        test_loss /= len(self.test_loader)

        # plot confusion matrix
        if epoch is not None:
            filename = self.plots_dir + 'confmat_epoch_acc_' + str(
                epoch) + '_' + str(int(100 * acc_all)) + '.png'
            plt.clf()
            plt.imshow(matrix, cmap='hot', interpolation='none')
            plt.gca().invert_yaxis()
            plt.xlabel("Predicted Label ID")
            plt.ylabel("True Label ID")
            plt.colorbar()
            plt.savefig(filename)

        print("Testing accuracy: All - %.4f, No Pause - %.4f , Loss: %.7f" %
              (acc_all, acc_nopause, test_loss))

        self.test_acc.append((acc_all, epoch))
        self.test_losses.append((test_loss, epoch))

        # if testing loss is minimum, store it as the 'best.pth' model, which is used for feature extraction
        if test_loss == min([x[0] for x in self.test_losses]):
            print("Best new model found!")
            self.model.save_model(True, epoch, self.train_losses,
                                  self.test_losses, self.test_acc,
                                  self.config_file['rnn'],
                                  self.model.num_layers, self.model.hidden_dim)

        return acc_all

    # Called during feature extraction. Takes log mel filterbank energies as input and outputs the phone predictions
    def test_one(self, file_path):

        (rate, sig) = wav.read(file_path)
        assert rate == 16000
        # sig ranges from -32768 to +32768 AND NOT -1 to +1
        feat, energy = fbank(sig,
                             samplerate=rate,
                             nfilt=self.config_file['feat_dim'],
                             winfunc=np.hamming)
        tsteps, hidden_dim = feat.shape
        # calculate log mel filterbank energies for complete file
        feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim))
        lens = np.array([tsteps])
        inputs, lens = torch.from_numpy(
            np.array(feat_log_full)).float(), torch.from_numpy(
                np.array(lens)).long()
        id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()}

        self.model.eval()

        with torch.no_grad():
            if self.cuda:
                inputs = inputs.cuda()
                lens = lens.cuda()

            # Pass through model
            a = time.time()

            outputs = self.model(inputs, lens).cpu().numpy()
            print(time.time() - a)
            # Since only one example per batch and ignore blank token
            outputs = outputs[0, :, :-1]
            softmax = np.exp(outputs) / np.sum(np.exp(outputs), axis=1)[:,
                                                                        None]

        return softmax, id_to_phone

    # Test for each wav file in the folder and also compare with ground truth
    def test_folder(self, test_folder, top_n=1, show_graphs=False):

        accs = []

        for wav_file in sorted(os.listdir(test_folder)):

            # Read input test file
            wav_path = os.path.join(test_folder, wav_file)
            dump_path = wav_path[:-4] + '_pred.txt'

            # Read only wav
            if wav_file == '.DS_Store' or wav_file.split(
                    '.')[-1] != 'wav':  # or os.path.exists(dump_path):
                continue

            (rate, sig) = wav.read(wav_path)
            assert rate == 16000
            # sig ranges from -32768 to +32768 AND NOT -1 to +1
            feat, energy = fbank(sig,
                                 samplerate=rate,
                                 nfilt=self.config_file['feat_dim'],
                                 winfunc=np.hamming)
            tsteps, hidden_dim = feat.shape
            # calculate log mel filterbank energies for complete file
            feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim))
            lens = np.array([tsteps])
            inputs, lens = torch.from_numpy(
                np.array(feat_log_full)).float(), torch.from_numpy(
                    np.array(lens)).long()
            id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()}

            self.model.eval()

            with torch.no_grad():

                if self.cuda:
                    inputs = inputs.cuda()
                    lens = lens.cuda()

                # Pass through model
                outputs = self.model(inputs, lens).cpu().numpy()
                # Since only one example per batch and ignore blank token
                outputs = outputs[0, :, :-1]
                softmax = np.exp(outputs) / np.sum(np.exp(outputs),
                                                   axis=1)[:, None]
                softmax_probs = np.max(softmax, axis=1)
                # print(softmax)
                # Take argmax ot generate final string
                argmaxed = np.argmax(outputs, axis=1)
                final_str = [id_to_phone[a] for a in argmaxed]
                # Generate dumpable format of phone, start time and end time
                ans = compress_seq(final_str)
                print("Predicted:", ans)

            phone_path = wav_path[:-3] + 'PHN'

            # If .PHN file exists, report accuracy
            if os.path.exists(phone_path):
                grtuth = read_phones(phone_path, self.replacement)
                print("Ground truth:", grtuth)

                unrolled_truth = []
                for elem in grtuth:
                    unrolled_truth += [elem[0]] * (elem[2] - elem[1] + 1)

                truth_softmax = []
                top_n_softmax = [[] for x in range(top_n)]
                # Check for top-n
                correct, total = 0, 0
                for i in range(min(len(unrolled_truth), len(final_str))):

                    truth_softmax.append(softmax[i][self.model.phone_to_id[
                        unrolled_truth[i]][0]])

                    indices = list(range(len(final_str)))
                    zipped = zip(indices, outputs[i])
                    desc = sorted(zipped, key=lambda x: x[1], reverse=True)
                    cur_frame_res = [id_to_phone[x[0]] for x in desc][:top_n]

                    for k in range(top_n):
                        top_n_softmax[k].append(softmax[i][
                            self.model.phone_to_id[cur_frame_res[k]][0]])

                    if unrolled_truth[i] in cur_frame_res:
                        # print truth softmax
                        # if unrolled_truth[i] != cur_frame_res[0]:
                        # print(i, truth_softmax[-1])
                        correct += 1

                    total += 1

                accs.append(correct / total)

                if show_graphs:
                    # Plot actual softmax and predicted softmax
                    for i in range(top_n):
                        plt.plot(top_n_softmax[i], label=str(i + 1) + ' prob.')
                    print(top_n_softmax)
                    plt.plot(truth_softmax,
                             label='Ground Truth prob',
                             alpha=0.6)
                    plt.xlabel("Frame number")
                    plt.ylabel("Prob")
                    plt.legend()
                    plt.show()

                with open(dump_path, 'w') as f:
                    f.write('Predicted:\n')
                    for t in ans:
                        f.write(' '.join(str(s) for s in t) + '\n')
                    f.write('\nGround Truth:\n')
                    for t in grtuth:
                        f.write(' '.join(str(s) for s in t) + '\n')
                    f.write('\nTop-' + str(top_n) + ' accuracy is ' +
                            str(correct / total))
            else:
                with open(dump_path, 'w') as f:
                    f.write('Predicted:\n')
                    for t in ans:
                        f.write(' '.join(str(s) for s in t) + '\n')
        print(accs)

    # take train/test loss and test accuracy input and plot it over time
    def plot_loss_acc(self, epoch):

        plt.clf()
        plt.plot([x[1] for x in self.train_losses],
                 [x[0] for x in self.train_losses],
                 c='r',
                 label='Train')
        plt.plot([x[1] for x in self.test_losses],
                 [x[0] for x in self.test_losses],
                 c='b',
                 label='Test')
        plt.title("Train/Test loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.legend()
        plt.grid(True)

        filename = self.plots_dir + 'loss' + '_' + str(epoch) + '.png'
        plt.savefig(filename)

        plt.clf()
        plt.plot([x[1] for x in self.test_acc],
                 [100 * x[0] for x in self.test_acc],
                 c='r')
        plt.title("Test accuracy")
        plt.xlabel("Epochs")
        plt.ylabel("Accuracy in %%")
        plt.grid(True)

        filename = self.plots_dir + 'test_acc' + '_' + str(epoch) + '.png'
        plt.savefig(filename)

        print("Saved plots")
Ejemplo n.º 18
0
        # forward + backward + optimize
        predict_labels = model(
            input_features, torch.LongTensor(sorted(
                lengths)[::-1]))  #size= batch_size x video_long(diff) x 11
        loss = loss_function(predict_labels, input_vals)  #size 64x11 vs 64
        loss.backward()
        optimizer.step()
        total_loss += loss.cpu().data.numpy()
        total_batchnum = batch_idx + 1
    print("avg training loss:", total_loss / total_batchnum)
    train_loss.append(total_loss / total_batchnum)

    # validation
    accuracy_val = 0
    with torch.no_grad():
        model.eval()
        for batch_idx, batch_val in enumerate(
                range(0, datalen_valid, BATCH_SIZE)):
            # get the batch items
            if batch_val + BATCH_SIZE > datalen_valid:
                valid_features_batch = valid_features[batch_val:]
                valid_vals_batch = valid_vals[batch_val:]
            else:
                valid_features_batch = valid_features[batch_val:batch_val +
                                                      BATCH_SIZE]
                valid_vals_batch = valid_vals[batch_val:batch_val + BATCH_SIZE]
            # sort the content in batch items by video length(how much frame/2048d inside)
            lengths = np.array([len(x) for x in valid_features_batch])
            sorted_indexes = np.argsort(lengths)[::-1]  # decreasing
            valid_features_batch = [
                valid_features_batch[i] for i in sorted_indexes
Ejemplo n.º 19
0
def train(batch_size=64, window_size=3, epochs=100):

    train_windows_dataset = Andersson_windows_dataset(mode='train', window_size=window_size)
    train_windows_loader = DataLoader(train_windows_dataset, batch_size=1, shuffle=True)

    val_windows_dataset = Andersson_windows_dataset(mode='val', window_size=window_size)
    val_windows_loader = DataLoader(val_windows_dataset, batch_size=1, shuffle=True)

    base_lr_rate = 1e-2
    weight_decay = 0.000016

    model = LSTM(input_size=40, hidden_size=512, num_classes=170, n_layers=16).to(device=torch.device('cuda:0'))   

    #criterion = nn.BCEWithLogitsLoss()
    criterion = nn.CrossEntropyLoss()
    #criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=base_lr_rate, weight_decay=weight_decay, amsgrad=True)

    for current_epoch in range(epochs):

        current_train_iter                      = 0
        current_val_iter                        = 0
        
        running_train_loss                      = 0.0
        current_average_train_loss              = 0.0
        running_val_loss                        = 0.0
        current_average_val_loss                = 0.0

        num_train_data                          = 0
        num_val_data                            = 0

        running_train_correct_preds             = 0
        running_train_correct_classwise_preds   = [0] * 170

        running_val_correct_preds               = 0
        running_val_correct_classwise_preds     = [0] * 170

        for phase in ['train', 'val']:

            # Train loop
            if phase == 'train':
                train_epoch_since = time.time()

                model.train()

                for train_windows, train_track_id in train_windows_loader:

                    train_iterating_dataset = Andersson_iterating_dataset(windows=train_windows, track_id=train_track_id)
                    train_iterating_loader = DataLoader(train_iterating_dataset, batch_size=batch_size, shuffle=True)
                    #train_iterator = iter(train_iterating_loader)

                    for train_batch_window, train_batch_label in train_iterating_loader:
                        current_train_iter += 1

                        outs = model(train_batch_window)        
            
                        #scheduler = poly_lr_scheduler(optimizer = optimizer, init_lr = base_lr_rate, iter = current_iter, lr_decay_iter = 1, 
                        #                          max_iter = max_iter, power = power)                                                          # max_iter = len(train_loader)
                
                        optimizer.zero_grad()
                
                        #loss = criterion(outs, train_batch_label)
                        gt_confidence, gt_index = torch.max(train_batch_label, dim=1)
                        loss = criterion(outs, gt_index)

                        running_train_loss += loss.item()
                        current_average_train_loss = running_train_loss / current_train_iter
                    
                        loss.backward(retain_graph=False)
                
                        optimizer.step()

                        pred_confidence, pred_index = torch.max(outs, dim=1)
                        #gt_confidence, gt_index = torch.max(train_batch_label, dim=1)
                        batch_correct_preds = torch.eq(pred_index, gt_index).long().sum().item()
                        batch_accuracy = (batch_correct_preds / train_batch_window.shape[0]) * 100

                        num_train_data += train_batch_window.shape[0]
                        running_train_correct_preds += batch_correct_preds

                        if current_train_iter % 10 == 0:
                            print(f"\nITER#{current_train_iter} BATCH TRAIN ACCURACY: {batch_accuracy}, RUNNING TRAIN LOSS: {loss.item()}")
                            print(f"Predicted / GT index:\n{pred_index}\n{gt_index}\n")

                last_epoch_average_train_loss = current_average_train_loss
                epoch_accuracy = (running_train_correct_preds / num_train_data) * 100

                print(f"EPOCH#{current_epoch+1} EPOCH TRAIN ACCURACY: {epoch_accuracy}, AVERAGE TRAIN LOSS: {last_epoch_average_train_loss}")

                train_time_elapsed = time.time() - train_epoch_since
            
            # Validation loop
            elif phase == 'val':
                val_epoch_since = time.time()   
               
                model.eval()
                
                with torch.no_grad():
                    for val_windows, val_track_id in val_windows_loader:

                        val_iterating_dataset = Andersson_iterating_dataset(windows=val_windows, track_id=val_track_id)
                        val_iterating_loader = DataLoader(val_iterating_dataset, batch_size=batch_size, shuffle=True)
                        #val_iterator = iter(val_iterating_loader)
                        
                        for val_batch_window, val_batch_label in val_iterating_loader:
                            current_val_iter += 1               
                            
                            outs = model(val_batch_window)
                            
                            gt_confidence, gt_index = torch.max(val_batch_label, dim=1)
                            #val_loss = criterion(outs, val_batch_label)
                            val_loss = criterion(outs, gt_index)
                        
                            running_val_loss += val_loss.item()
                            current_average_val_loss = running_val_loss / current_val_iter

                            pred_confidence, pred_index = torch.max(outs, dim=1)
                            #gt_confidence, gt_index = torch.max(val_batch_label, dim=1)
                            batch_correct_preds = torch.eq(pred_index, gt_index).long().sum().item()
                            batch_accuracy = (batch_correct_preds / val_batch_window.shape[0]) * 100

                            num_val_data += val_batch_window.shape[0]
                            running_val_correct_preds += batch_correct_preds

                            if current_val_iter % 10 == 0:
                                print(f"ITER#{current_val_iter} BATCH VALIDATION ACCURACY: {batch_accuracy}, RUNNING VALIDATION LOSS: {val_loss.item()}")
                                print(f"Predicted / GT index: {pred_index} / {gt_index}\n")

                    last_epoch_average_val_loss = current_average_val_loss
                    epoch_accuracy = (running_val_correct_preds / num_val_data) * 100
                    print(f"EPOCH#{current_epoch+1} EPOCH VALIDATION ACCURACY: {epoch_accuracy}, AVERAGE VALIDATION LOSS: {last_epoch_average_val_loss}")

                    val_time_elapsed = time.time() - val_epoch_since
Ejemplo n.º 20
0
        loss = criterion(output, target)

        # backpropagation, compute gradients
        loss.backward()

        # apply gradients
        optimizer.step()

        train_loss += loss.data.item()
        y_pred = output.argmax(dim=1, keepdim=True)
        train_correct += y_pred.eq(target.view_as(y_pred)).sum().item()

    train_loss /= len(train_iter)
    train_accuracy = 100 * train_correct / len(train_iter.dataset)

    net.eval()
    val_correct = 0
    for batch in val_iter:
        text, target = batch.text, batch.label
        output = net(text)
        loss = criterion(output, target)
        val_loss += loss.data.item()
        y_pred = output.argmax(dim=1, keepdim=True)
        val_correct += y_pred.eq(target.view_as(y_pred)).sum().item()

    val_loss /= len(val_iter)
    val_accuracy = 100 * val_correct / len(val_iter.dataset)

    print(f"Epoch {epoch + 1} :: Train/Loss {round(train_loss, 3)} :: "
          "Train/Accuracy {round(train_accuracy, 3)}")
    print(f"Epoch {epoch + 1} :: Val/Loss {round(val_loss, 3)} :: "
Ejemplo n.º 21
0
def main(train_type=None):
    model_path = './model.pth'
    # dir_path = Path('/home/g19tka13/Downloads/data/3C')
    # data_path = dir_path / 'taskA/train.csv'
    train_data, weighted = strtolist()
    test_data = loadtestdata()
    preudo_list = []
    used_unlabeled_data = None
    unlabeled_data = None
    vocab = None
    if train_type == 'self_train':
        unlabeled_data = pd.read_csv('/home/g19tka13/taskA/aclgenerate.csv',
                                     sep=',')
        unlabeled_data = unlabeled_data.head(3000)
        vocab = load_word_vector(train_data, test_data, 'self_train',
                                 unlabeled_data)
    #     prelabeled_data = None
    #     vocab = load_word_vector(train_data, test_data, 'self_train', used_unlabeled_data)
    #
    #     if len(preudo_list) == 0:  # 判断是否第一次训练模型。
    #         train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1)
    #     else:
    #         train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1, prelabeled_data)  # 加入数据
    else:
        vocab = load_word_vector(train_data, test_data)
    #     train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1)
    # test_iter, unlabel_iter = assemble(test_data, vocab, 0)
    # return train_iter, val_iter, test_iter, vocab, weighted, label_word_id
    best_val_f1 = 0
    if train_type == 'self_train':
        prelabel_data = None
        vocab_size = vocab.vectors.size()
        print('Total num. of words: {}, word vector dimension: {}'.format(
            vocab_size[0], vocab_size[1]))
        model = LSTM(vocab_size[0],
                     vocab_size[1],
                     hidden_size=100,
                     num_layers=2,
                     batch=10)
        model.embedding.weight.data = vocab.vectors
        model.embedding.weight.requires_grad = False
        print(model)
        while len(preudo_list) < 2700:
            class_id = []
            delete_id = []
            if len(preudo_list) == 0:  # 判断是否第一次训练模型。
                train_iter, val_iter, label_word_id = assemble(
                    train_data, vocab, 1)
            else:
                train_iter, val_iter, label_word_id = assemble(
                    train_data, vocab, 1,
                    prelabeled_data=prelabel_data)  # 加入数据

            test_iter, unlabel_iter = assemble(test_data,
                                               vocab,
                                               0,
                                               unlabeled_data=unlabeled_data)
            weight = torch.tensor(weighted)
            train_iter = Data.DataLoader(train_iter,
                                         batch_size=10,
                                         shuffle=True)
            val_iter = Data.DataLoader(val_iter, batch_size=10, shuffle=True)
            test_iter = Data.DataLoader(test_iter,
                                        batch_size=10,
                                        shuffle=False)
            unlabel_iter = Data.DataLoader(unlabel_iter,
                                           batch_size=10,
                                           shuffle=False)
            # vocab_size = vocab.vectors.size()
            # print('Total num. of words: {}, word vector dimension: {}'.format(
            #     vocab_size[0],
            #     vocab_size[1]))
            # model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=10)
            # model.embedding.weight.data = vocab.vectors
            # model.embedding.weight.requires_grad = False # 使用已经训练好的词向量, 即保持词向量不更新(固定词向量) 则设置为false
            # print(model)
            # print(model.parameters())
            # for parameter in model.parameters():
            #     print(parameter)
            optimizer = optim.Adam(model.parameters(), lr=0.0005)
            n_epoch = 10
            # nn.CrossEntropyLoss you will give your weights only once while creating the module
            # loss_cs = nn.CrossEntropyLoss(weight=weight)
            # loss_fnc = nn.CosineEmbeddingLoss()
            # loss_mes = nn.MSELoss()
            y = torch.ones(1).long()
            for epoch in range(n_epoch):
                # model.train放在哪参考网址 https://blog.csdn.net/andyL_05/article/details/107004401
                model.train()
                for item_idx, item in enumerate(train_iter, 0):
                    label = item[2]
                    unique_num, count = torch.unique(
                        label, return_counts=True)  # default sorted=True
                    unique_num = unique_num.tolist()
                    # print(unique_num, count)
                    real_weight = torch.ones(6, dtype=torch.float)
                    for i in range(6):
                        if i in unique_num:
                            idx = unique_num.index(i)
                            real_weight[i] = 1 / np.log(1.02 + count[idx] / 10)
                        else:
                            real_weight[i] = 1 / np.log(2.02)
                    optimizer.zero_grad()
                    out = model(item)
                    # label_pred = KMeans(n_clusters=6, init=label_out).fit_predict(out)
                    # fixed weight result=0.1716
                    # loss = F.cross_entropy(out, label.long(), weight=weight)
                    # real time weight calculation
                    loss = F.cross_entropy(out,
                                           label.long(),
                                           weight=real_weight)
                    # nn.CosineEmbeddingLoss() 损失函数需要是二维矩阵,而不是一维的。
                    # loss = loss_fnc(torch.unsqueeze(label_pred, dim=0), torch.unsqueeze(label.long(), dim=0), y)
                    # loss = Variable(loss, requires_grad=True)
                    # loss_MES = loss_mes(out,  label_vector)
                    # loss = loss_fnc(out, torch.Tensor(one_hot), y)
                    loss.backward()
                    # print(model.lstm.all_weights.shape)
                    # print(model.lstm.)
                    optimizer.step()
                    if (item_idx + 1) % 5 == 0:
                        train_value, train_y_pre = torch.max(
                            out, 1
                        )  # max函数有两个返回值(此处out是二维数组)第一个是最大值的list,第二个是值对应的位置
                        # print('train_value', train_value)
                        # acc = torch.mean((torch.tensor(train_y_pre == label.long(), dtype=torch.float)))
                        # print(train_y_pre, label.long())
                        f1 = f1_score(label.long(),
                                      train_y_pre,
                                      average='macro')
                        # print(train_y_pre, label)
                        print(
                            'epoch: %d \t item_idx: %d \t loss: %.4f \t f1: %.4f'
                            % (epoch, item_idx, loss, f1))

                model.eval()  # 跑完一个epoch就评价一次模型
                val_pre_label = []
                val_y_label = []
                # if (epoch+1) % 5 == 0:
                with torch.no_grad():
                    # print(unlabel_iter)
                    # for item in unlabel_iter:  # prelabel
                    #     index = item[2]
                    #     out = model(item)
                    #     out = F.softmax(out, dim=1)
                    #     predict_value, predict_class = torch.max(out, 1)
                    #     print('predict_value', predict_value)
                    #     for i in range(len(predict_value)):
                    #         if predict_value[i] > 0.9:
                    #             delete_id.append(index[i])  # 为了获得数据索引,根据索引从原数据中删除。
                    #             class_id.append(predict_class[i])
                    for item in val_iter:
                        label = item[2]
                        out = model(item)
                        _, val_y_pre = torch.max(out, 1)
                        val_pre_label.extend(val_y_pre)
                        val_y_label.extend(label)
                #         f1 = f1_score(label.long(), val_y_pre, average='macro')
                #         val_f1.append(f1)
                # f1 = np.array(f1).mean()
                f1 = f1_score(torch.Tensor(val_y_label).long(),
                              torch.Tensor(val_pre_label),
                              average='macro')
                print(f1)
                if f1 > best_val_f1:
                    print('val acc: %.4f > %.4f saving model %.4f' %
                          (f1, best_val_f1, len(preudo_list)))
                    torch.save(model.state_dict(), model_path)
                    best_val_f1 = f1
            model.eval()  # 一轮训练结束在创建pseudo-label
            with torch.no_grad():
                for item in unlabel_iter:  # prelabel
                    index = item[2]
                    out = model(item)
                    out = F.softmax(out, dim=1)
                    predict_value, predict_class = torch.max(out, 1)
                    # print('predict_value', predict_value)
                    # print('predict_class', predict_class)
                    for i in range(len(predict_value)):
                        if predict_value[i] > 0.9:
                            delete_id.append(
                                index[i].item())  # 为了获得数据索引,根据索引从原数据中删除。
                            class_id.append(predict_class[i].item())
            preudo_list.extend(delete_id)
            if len(preudo_list) != 0:
                unlabeled_data, prelabel_data = split_unlabeled_data(
                    unlabeled_data, delete_id, class_id, prelabel_data)
    else:
        train_iter, val_iter, label_word_id, label_to_id = assemble(
            train_data, vocab, 1)
        test_iter, unlabel_iter = assemble(test_data, vocab, 0)
        # train_iter, val_iter, test_iter, vocab, weight, label_word_id = load_data()
        weight = torch.tensor(weighted)
        train_iter = Data.DataLoader(train_iter,
                                     batch_size=batch_size,
                                     shuffle=True)
        val_iter = Data.DataLoader(val_iter,
                                   batch_size=batch_size,
                                   shuffle=True)
        test_iter = Data.DataLoader(test_iter,
                                    batch_size=batch_size,
                                    shuffle=False)
        vocab_size = vocab.vectors.size()
        print('Total num. of words: {}, word vector dimension: {}'.format(
            vocab_size[0], vocab_size[1]))
        model = LSTM(vocab_size[0],
                     vocab_size[1],
                     hidden_size=100,
                     num_layers=2,
                     batch=batch_size)
        model.embedding.weight.data = vocab.vectors
        model.embedding.weight.requires_grad = False
        print(model)
        # print(model.parameters())
        # for parameter in model.parameters():
        #     print(parameter)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        n_epoch = 50
        best_val_f1 = 0
        # nn.CrossEntropyLoss you will give your weights only once while creating the module
        # loss_cs = nn.CrossEntropyLoss(weight=weight)
        loss_fnc = nn.CosineEmbeddingLoss(reduction='mean',
                                          size_average=True,
                                          reduce=True)
        # loss_mes = nn.MSELoss()
        one_list = torch.ones((batch_size, 1), dtype=torch.float)
        zero_list = torch.zeros((batch_size, 1), dtype=torch.float)
        for epoch in range(n_epoch):
            # model.train放在哪参考网址 https://blog.csdn.net/andyL_05/article/details/107004401
            model.train()
            batch_loss = 0
            for item_idx, item in enumerate(train_iter, 0):
                label = item[2]
                unique_num, count = torch.unique(
                    label, return_counts=True)  # default sorted=True
                unique_num = unique_num.tolist()
                # print(unique_num, count)
                real_weight = torch.ones(6, dtype=torch.float)
                for i in range(6):
                    if i in unique_num:
                        idx = unique_num.index(i)
                        real_weight[i] = 1 / np.log(1.02 +
                                                    count[idx] / batch_size)
                    else:
                        real_weight[i] = 1 / np.log(2.02)
                optimizer.zero_grad()
                # out, p_rep, n_rep = model(item, label_to_id)
                out, out_o, label_matrix, out_len, label_id = model(
                    item, label_to_id)
                # label_pred = KMeans(n_clusters=6, init=label_out).fit_predict(out)
                # fixed weight result=0.1716
                # loss = F.cross_entropy(out, label.long(), weight=weight)
                # real time weight calculation
                p_rep, n_rep = confusion(out_o, label_matrix, out_len,
                                         label_id)
                loss1 = F.cross_entropy(out, label.long(), weight=real_weight)
                loss2 = loss_fnc(out, p_rep, one_list)
                loss3 = loss_fnc(out, n_rep, zero_list)
                loss = loss1 + loss2 + loss3
                # batch_loss = batch_loss + +loss2 + loss
                # nn.CosineEmbeddingLoss() 损失函数需要是二维矩阵,而不是一维的。
                # loss = loss_fnc(torch.unsqueeze(label_pred, dim=0), torch.unsqueeze(label.long(), dim=0), y)
                # loss = Variable(loss, requires_grad=True)
                # loss_MES = loss_mes(out,  label_vector)
                # loss = loss_fnc(out, torch.Tensor(one_hot), y)
                loss.backward()
                # print(model.lstm.all_weights.shape)
                # print(model.lstm.)
                optimizer.step()
                if (item_idx + 1) % 5 == 0:
                    _, train_y_pre = torch.max(
                        out,
                        1)  # max函数有两个返回值(此处out是二维数组)第一个是最大值的list,第二个是值对应的位置

                    # acc = torch.mean((torch.tensor(train_y_pre == label.long(), dtype=torch.float)))
                    # print(train_y_pre, label.long())
                    f1 = f1_score(label.long(), train_y_pre, average='macro')
                    # print(train_y_pre, label)
                    print(
                        'epoch: %d \t item_idx: %d \t loss: %.4f \t f1: %.4f' %
                        (epoch, item_idx, loss, f1))
                    # batch_loss = 0
            # finish each epoch val a time
            val_pre_label = []
            val_y_label = []
            # if (epoch + 1) % 5 == 0:
            model.eval()
            with torch.no_grad():
                for item in val_iter:
                    label = item[2]
                    out = model(item)
                    _, val_y_pre = torch.max(out, 1)
                    val_pre_label.extend(val_y_pre)
                    val_y_label.extend(label)
                    # acc = torch.mean((torch.tensor(val_y_pre == label, dtype=torch.float)))
            #         f1 = f1_score(label.long(), val_y_pre, average='macro')
            #         val_f1.append(f1)
            # f1 = np.array(f1).mean()
            f1 = f1_score(torch.Tensor(val_y_label).long(),
                          torch.Tensor(val_pre_label),
                          average='macro')
            print(f1)
            if f1 > best_val_f1:
                print('val acc: %.4f > %.4f saving model' % (f1, best_val_f1))
                torch.save(model.state_dict(), model_path)
                best_val_f1 = f1
    test_f1 = []
    test_pre_label = []
    test_y_label = []
    model_state = torch.load(model_path)
    model.load_state_dict(model_state)
    model.eval()
    with torch.no_grad():
        for item_idx, item in enumerate(test_iter, 0):
            label = item[2]
            out = model(item)
            _, test_pre = torch.max(out, 1)
            test_pre_label.extend(test_pre)
            test_y_label.extend(label)
            # print('test_true_label={} test_pre_label={}'.format(label, test_y_pre))
            # f1 = f1_score(label.long(), test_y_pre, average='macro')
            # test_f1.append(f1)
    final_f1 = f1_score(torch.Tensor(test_y_label).long(),
                        torch.Tensor(test_pre_label),
                        average='macro')
    # final_f1 = np.array(test_f1).mean()
    print('test_pre_label',
          collections.Counter(torch.Tensor(test_pre_label).tolist()))
    print('test_y_label',
          collections.Counter(torch.Tensor(test_y_label).tolist()))
    print('test f1 : %.4f' % final_f1)
    generate_submission(torch.Tensor(test_pre_label).tolist())
    count = {}
    test_pre = torch.Tensor(test_pre_label).tolist()
    test_true = torch.Tensor(test_y_label).tolist()
    c_matrxi = confusion_matrix(test_true, test_pre, labels=[0, 1, 2, 3, 4, 5])
    print(c_matrxi)
    for i in range(len(test_true)):
        if test_true[i] == test_pre[i]:
            if test_true[i] not in count.keys():
                count[test_true[i]] = 1
            else:
                count[test_true[i]] = count[test_true[i]] + 1
    print(count)
    pre_true = pd.DataFrame(columns=['true_id', 'pre_id'])
    test_true_ser = pd.Series(test_true)
    test_pre_ser = pd.Series(test_pre)
    pre_true['true_id'] = test_true_ser
    pre_true['pre_id'] = test_pre_ser
    pre_true.to_csv('/home/g19tka13/taskA/true_predict.csv',
                    sep=',',
                    index=False)