Esempio n. 1
0
def train_autoencoder(device, args):
    # model definition
    model = FeatureExtractor()
    model.to(device)
    # data definition
    all_chunks = []
    # concatenate all chunk files
    # note that it is independent of the
    # class of each chunk sinc we are creating
    # a generative dataset
    for label in filesystem.listdir_complete(filesystem.train_audio_chunks_dir):
        chunks = filesystem.listdir_complete(label)
        all_chunks = all_chunks + chunks
    train_chunks, eval_chunks = train_test_split(all_chunks, test_size=args.eval_size)
    # transforms and dataset
    trf = normalize

    train_dataset = GenerativeDataset(train_chunks, transforms=trf)
    eval_dataset = GenerativeDataset(eval_chunks, transforms=trf)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,
                                num_workers=4, collate_fn=None,pin_memory=True)
    eval_dataloader = DataLoader(eval_dataset, batch_size=1, shuffle=True,
                                num_workers=4, collate_fn=None,pin_memory=True)

    # main loop
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    loss_criterion = SoftDTW(use_cuda=True, gamma=0.1)
    train_count = 0
    eval_count = 0
    for epoch in range(args.n_epochs):
        print('Epoch:', epoch, '/', args.n_epochs)
        train_count = train_step(model, train_dataloader, optimizer, loss_criterion, args.verbose_epochs, device, train_count)
        eval_count = eval_step(model, eval_dataloader, loss_criterion, args.verbose_epochs, device, eval_count)
        torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model_checkpoint.pt'))
Esempio n. 2
0
def train(args):
    # First we extract the Data and vectorize it
    data_loc = get_path_to_loc(args.data_path)
    checkpoint_path = get_path_to_loc(args.checkpoint_prefix)

    list_test = run_data_extraction(data_loc)
    notes2idx, idx2note = get_notes_mapping_dict(list_test)
    notes_vec = vectorize_notes_by_mapping(list_test, notes2idx)

    # Now we set the model and the optimizer
    model = build_model(len(notes_vec), args.embedding_dim, args.rnn_units,
                        args.batch_size)
    optimizer = set_optimizer(args.optimizer, args.learning_rate)

    # train the model
    history = []
    # plotter = PeriodicPlotter(sec=2, xlabel='Iterations', ylabel='Loss') todo
    if hasattr(tqdm, '_instances'):
        tqdm._instances.clear()  # clear if it exists

    for iteration in tqdm(range(args.training_iterations)):

        # Grab a batch and propagate it through the network
        x_batch, y_batch = get_batch(notes_vec, args.seq_length,
                                     args.batch_size)
        loss = train_step(x_batch, y_batch, model, optimizer)

        # Update the progress bar
        history.append(loss.numpy().mean())
        # plotter.plot(history) Todo: put back

        # Update the model with the changed weights!
        if iteration % 100 == 0:
            model.save_weights(checkpoint_path)

    # Save the trained model and the weights
    model.save_weights(args.checkpoint_prefix)
Esempio n. 3
0
            },
        {
            'params':[par for par in to_be_trained if par in no_decay],
            'weight_decay':0
            }
        ]


    #optimizer = Adam(grouped_params, lr=LR)
    optimizer = AdamW(grouped_params, lr=LR)

    best_valid_accuracy = 0
    train_list, valid_list = [], []
    for epoch in range(EPOCHS):
        # tarin step over all batches
        model.train_step(network, train_data_loader, loss_function, optimizer, device)

        train_targets, train_outputs = model.evaluate(network, train_data_loader, device)
        valid_targets, valid_outputs = model.evaluate(network, valid_data_loader, device)

        train_accuracy = accuracy_score(train_targets, [int(i>0.5) for i in train_outputs])
        valid_accuracy = accuracy_score(valid_targets, [int(i>0.5) for i in valid_outputs])

        train_list.append(train_accuracy)
        valid_list.append(valid_accuracy)

        print("Train Accuracy", train_accuracy)
        print("Valid Accuracy", valid_accuracy)

        if valid_accuracy > best_valid_accuracy:
            best_valid_accuracy = valid_accuracy
Esempio n. 4
0
if __name__ == "__main__":

    dataset = pd.read_json("data/blog.json", encoding="utf-8")
    sd = SummarizationDataset(dataset=dataset, word_train=True)
    train, test = sd.get_embedded_dataset()

    encoder_input = np.array(
        [x.reshape(32, 100) for x in train["TEXTCONTENT"]])
    decoder_input = np.array([x.reshape(10, 100) for x in train["TITLE"]])
    decoder_output = np.array([x.reshape(10) for x in train["TITLE_IDX"]])
    testset_input = np.array([x.reshape(32, 100) for x in test["TEXTCONTENT"]])

    vocab_length = len(sd.embedding.idx_word_dict)
    model = Seq2Seq(vocab_length=vocab_length)
    loss_obejct = tf.keras.losses.SparseCategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam()
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    for epoch in range(50):
        train_step(model, encoder_input, decoder_input, decoder_output,
                   loss_obejct, optimizer, train_loss, train_accuracy)
        template = 'Epoch {}, Loss: {}, Accuracy: {}'
        print(
            template.format(epoch + 1, train_loss.result(),
                            train_accuracy.result() * 100))

    prediction = test_step(model, testset_input[0:1]).numpy().tolist()[0]
    print("요약된 문장 : ", [sd.embedding._idx_to_word(x) for x in prediction])
Esempio n. 5
0
        model = Seq2seq(source_words_count=en_words_count,
                        target_words_count=ko_words_count,
                        sos=ko_tokenizer.word_index["<start>"],
                        eos=ko_tokenizer.word_index["<end>"])

        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.keras.optimizers.Adam()

        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            name='train_accuracy')

        idx = 0
        for epoch in range(config["epochs"]):
            for seqs, labels in train_ds:
                train_step(model, seqs, labels, loss_object, optimizer,
                           train_loss, train_accuracy)
                if idx % 100 == 0:
                    template = 'Epoch {}, Loss: {}, Accuracy:{}'
                    print(
                        template.format(epoch, train_loss.result(),
                                        train_accuracy.result() * 100))
                idx += 1

        test = en_tokenizer.texts_to_sequences(en[0:1])
        pred = test_step(model, np.array(test))

        print(en[0])
        print(pred)
        print(ko_tokenizer.sequences_to_texts(pred.numpy()))

        with open(config["save_model_path"] + "source_tokenizer.pkl",