def predict():
    # prepare data_loader and vocab
    use_by_article = False
    if use_by_article:
        _, data_loader_test, vocab = prepare_byarticle_data()
    else:
        _, _, data_loader_test, vocab = prepare_data('./data_new/preprocessed_new_{}', constant.batch_size)
    
    if constant.use_bert:
        from pytorch_pretrained_bert import BertTokenizer, BertModel
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_model = BertModel.from_pretrained('bert-base-uncased')
        state = torch.load("bert_model/pytorch_model.bin")
        bert_model.load_state_dict(state)
        article_model = bert_model
        title_model = bert_model
        # print("finish bert model loading")
        LR = models.Classifier(hidden_dim1=768, hidden_dim2=768)
        classifer_state = torch.load("bert_model/classifier.bin")
        LR.load_state_dict(classifer_state)
        # 
    else:
        # for basic LSTM model
        article_model = models.LSTM(vocab=vocab, 
                        embedding_size=constant.emb_dim, 
                        hidden_size=constant.hidden_dim, 
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb
                        )
        title_model = models.LSTM(vocab=vocab,
                        embedding_size=constant.emb_dim,
                        hidden_size=constant.hidden_dim_tit,
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb
                        )
        LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)

        # load parameters
        article_model = load_model(article_model, model_name="article_model")
        title_model = load_model(title_model, model_name="title_model")
        LR = load_model(LR, model_name="LR")

    if constant.USE_CUDA:
        article_model.cuda()
        title_model.cuda()
        LR.cuda()

    # predict and save result in result folder
    predict(article_model, title_model, LR, data_loader_test, name="bypublisher", print_pred=True)
Esempio n. 2
0
def load_model(path_model, path_config, vocab):
    config = Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")

    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=None,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=None,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=None,
                           EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unkwown model name: %s" % model_name
        sys.exit(-1)
    serializers.load_npz(path_model, model)
    return model
Esempio n. 3
0
def train(args, jsons):
    '''
    Trains a model to do character-based ASR using CTC loss on WSJ.
    '''
    trainset, trainloader = make_dataset_dataloader(args, jsons, split='train')
    devset, devloader = make_dataset_dataloader(args, jsons, split='dev')

    model = models.LSTM(num_layers=args.n_layers,
                        hidden_dim=args.hidden_dim,
                        bidirectional=(not args.unidir))

    use_gpu = not args.cpu and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_gpu else 'cpu')
    model.to(device)

    ctc_loss = torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.98))

    global best_wer
    best_wer = np.inf

    tr_epoch = lambda ep: train_epoch(ep, args, trainset, trainloader, devset,
                                      devloader, model, ctc_loss, optimizer)
    stats = list(map(tr_epoch, range(args.n_epochs)))

    return stats
Esempio n. 4
0
 def __init__(self, config, word_vocab):
     super(GLSTM, self).__init__()
     self.config = config
     self.hidden_size = config.hidden_size
     self.emb_size = config.emb_size
     self.word_vocab = word_vocab
     if word_vocab.emb is not None:
         self.word_emb = nn.Embedding.from_pretrained(torch.from_numpy(word_vocab.emb), freeze=config.freeze_emb)
     else:
         self.word_emb = nn.Embedding(word_vocab.voc_size, config.emb_size)
     self.node_emb = nn.Embedding(config.node_num, config.hidden_size)
     if config.encoder == 'cnn':
         self.text_encoder = models.CNN_Encoder(config.filter_size, config.hidden_size)
     elif config.encoder == 'rnn':
         self.text_encoder = models.RNN_Encoder(config.hidden_size, config.emb_size, config.dropout)
     self.feature_weight = nn.Linear(config.feature_size, config.hidden_size)
     self.feature_lstm = models.LSTM(config.hidden_size, config.hidden_size, config.dropout, bidirec=False)
     self.feature_combine = nn.Linear(config.hidden_size * 2, config.hidden_size)
     self.attn_pooling = Attentive_Pooling(config.hidden_size)
     assert config.graph_encoder in {'lstm', 'gru', 'highway', 'rgcn'}
     if config.graph_encoder == 'lstm':
         self.s_cell = SLSTMCell(config.hidden_size, config.hidden_size, config.relation_num)
         self.g_cell = GLSTMCell(config.hidden_size, self.attn_pooling)
     elif config.graph_encoder == 'gru':
         self.s_cell = SGRUCell(config.hidden_size, config.hidden_size, config.relation_num)
         self.g_cell = GGRUCell(config.hidden_size, self.attn_pooling)
     elif config.graph_encoder == 'highway':
         self.highway = highway_RGCN(config.hidden_size, config.hidden_size, config.relation_num)
     elif config.graph_encoder == 'rgcn':
         self.rgcn = RGCN(config.hidden_size, config.hidden_size, config.relation_num)
     self.w_out = nn.Linear(config.hidden_size, config.label_size)
     self.num_layers = config.num_layers
     self.dropout = torch.nn.Dropout(config.dropout)
Esempio n. 5
0
def make_preds_labels(model_dir, save_file, utt_idx=0):
    '''
    Logs the difference between predicted and label word sequence of a single
    utterance in the dev set for every model saved in an experiment.
    '''
    with open(os.path.join(model_dir, 'args.json'), 'r') as f:
        args_dict = json.load(f)

    model = models.LSTM(num_layers=args_dict['n_layers'],
                        hidden_dim=args_dict['hidden_dim'],
                        bidirectional=(not args_dict['unidir']))

    wts = glob.glob(os.path.join(model_dir, '*.pt'))

    dataset = datasets.ESPnetBucketDataset(
        os.path.join(args_dict['data_root'],
                     'dump/test_dev93/deltafalse/data.json'),
        os.path.join(args_dict['data_root'],
                     'lang_1char/train_si284_units.txt'),
        load_dir=args_dict['bucket_load_dir'],
        n_buckets=args_dict['n_buckets'])

    lines = {}
    for wt in wts:
        model.load_state_dict(torch.load(wt))
        device = torch.device('cpu')
        model.to(device)

        data = dataset[utt_idx]
        feat = data['feat'].copy()[None, ...]

        log_probs, embed = model(torch.tensor(feat))
        log_probs = log_probs.detach().numpy()
        labels = np.array(data['label'])

        preds, to_remove = decoder.batch_greedy_ctc_decode(log_probs,
                                                           zero_infinity=True)
        preds = preds[preds != to_remove]

        pred_words = decoder.compute_words(preds, dataset.idx2tok)
        label_words = decoder.compute_words(labels, dataset.idx2tok)

        lines[wt] = []
        lines[wt].append(f'Predicted:\n{" ".join(pred_words)}')
        lines[wt].append(f'Label:\n{" ".join(label_words)}')

    with open(os.path.join(model_dir, save_file), 'w') as f:
        for wt in lines.keys():
            f.write(f'{wt}\n')
            for line in lines[wt]:
                f.write(line + '\n')
            f.write('\n')
Esempio n. 6
0
def nn_predict(data):
    model = models.LSTM(output_size=64).to(device)
    checkpoint = torch.load('./models/nn.hdf5',
                            map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint['model_state_dict'])

    model.eval()

    data = np.array(data[1:-1].split(', ')).astype(float)
    data = torch.tensor(data).float()

    predict = model.predict(data)
    return predict
Esempio n. 7
0
def evaluate(args, jsons, spk2genders):
    model = models.LSTM(num_layers=args.n_layers,
                        hidden_dim=args.hidden_dim,
                        bidirectional=args.bidir)

    model.load_state_dict(
        torch.load(os.path.join(args.temp_root, args.model_dir, 'best.pt')))

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    splits = ['train', 'dev', 'test']
    for split in splits:
        evaluate_split(args, jsons, spk2genders, model, split=split)
Esempio n. 8
0
def evaluate(args, jsons, spk2genders):
    if args.adversarial:
        model=models_gender.LSTM_gender(num_layers=args.n_layers, hidden_dim=args.hidden_dim,
                                 bidirectional=args.bidir)
    else:
        model = models.LSTM(num_layers=args.n_layers, hidden_dim=args.hidden_dim,
                        bidirectional=args.bidir)

    model.load_state_dict(torch.load(os.path.join(args.temp_root, 
                                                  args.model_dir,
                                                  'best.pt')))

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    if args.test:
        evaluate_split(args, jsons, spk2genders, model, split='test')
    else:
        evaluate_split(args, jsons, spk2genders, model, split='train')
        evaluate_split(args, jsons, spk2genders, model, split='dev')
Esempio n. 9
0
def evaluate(args, jsons):
    '''
    Evaluates a model trained for character-based ASR with CTC loss on WSJ.
    '''
    model = models.LSTM(num_layers=args.n_layers,
                        hidden_dim=args.hidden_dim,
                        bidirectional=(not args.unidir))

    model.load_state_dict(torch.load(os.path.join(args.model_dir, 'best.pt')))

    use_gpu = not args.cpu and torch.cuda.is_available()
    device = torch.device('cuda:0' if use_gpu else 'cpu')
    model.to(device)

    ctc_loss = torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True)

    splits = ['train', 'dev']
    if args.test:
        splits.append('test')

    for split in splits:
        dataset, dataloader = make_dataset_dataloader(args, jsons, split=split)
        stats = evaluate_epoch(dataset, dataloader, model, ctc_loss)
        log_stats(f'Final results on {split}', stats)
Esempio n. 10
0
                                        1), np.expand_dims(validate_y, 1)
test_x, test_y = np.expand_dims(test_x, 1), np.expand_dims(test_y, 1)

seed = FLAGS.seed
torch.manual_seed(seed)

rmse_list = []
mae_list = []

if FLAGS.algorithm == 'RNN':
    model = models.RNN(input_size=FLAGS.input_size,
                       hidden_size=FLAGS.hidden_size,
                       output_size=FLAGS.output_size)
elif FLAGS.algorithm == 'LSTM':
    model = models.LSTM(input_size=FLAGS.input_size,
                        hidden_size=FLAGS.hidden_size,
                        output_size=FLAGS.output_size)
elif FLAGS.algorithm == 'mRNN_fixD':
    model = models.MRNNFixD(input_size=FLAGS.input_size,
                            hidden_size=FLAGS.hidden_size,
                            output_size=FLAGS.output_size,
                            k=FLAGS.K)
elif FLAGS.algorithm == 'mRNN':
    model = models.MRNN(input_size=FLAGS.input_size,
                        hidden_size=FLAGS.hidden_size,
                        output_size=FLAGS.output_size,
                        k=FLAGS.K)
elif FLAGS.algorithm == 'mLSTM_fixD':
    model = models.MLSTMFixD(input_size=FLAGS.input_size,
                             hidden_size=FLAGS.hidden_size,
                             output_size=FLAGS.output_size,
Esempio n. 11
0
def LSTMTrainer(df):
    param = constants.lstm_param
    model = models.LSTM(param["input_dim"], param["hidden_dim"],
                        param["layer_dim"], param["output_dim"])
    generic_train(model, df, param, "log/lstm.txt", "vanilla_lstm.pth")
Esempio n. 12
0
import torch
import datasets
import models

dataset_loader = datasets.uiuc_video(1, batch_size=8)
epoch_size = 200
device = 'cuda:0'
model = models.LSTM()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
                            lr=1e-4,
                            momentum=0.8,
                            weight_decay=1e-2)
Esempio n. 13
0
def main(gpu, path_corpus, path_config, path_word2vec):
    MAX_EPOCH = 50
    EVAL = 200
    MAX_LENGTH = 70

    config = utils.Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")

    print "[info] CORPUS: %s" % path_corpus
    print "[info] CONFIG: %s" % path_config
    print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec
    print "[info] MODEL: %s" % model_name
    print "[info] WORD DIM: %d" % word_dim
    print "[info] STATE DIM: %d" % state_dim
    print "[info] GRADIENT CLIPPING: %f" % grad_clip
    print "[info] WEIGHT DECAY: %f" % weight_decay
    print "[info] BATCH SIZE: %d" % batch_size

    path_save_head = os.path.join(
        config.getpath("snapshot"),
        "rnnlm.%s.%s" % (os.path.basename(path_corpus),
                         os.path.splitext(os.path.basename(path_config))[0]))
    print "[info] SNAPSHOT: %s" % path_save_head

    sents_train, sents_val, vocab, ivocab = \
            utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH)

    if path_word2vec is not None:
        word2vec = utils.load_word2vec(path_word2vec, word_dim)
        initialW = utils.create_word_embeddings(vocab,
                                                word2vec,
                                                dim=word_dim,
                                                scale=0.001)
    else:
        initialW = None

    cuda.get_device(gpu).use()
    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=initialW,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "bd_lstm":
        model = models.BD_LSTM(vocab_size=len(vocab),
                               word_dim=word_dim,
                               state_dim=state_dim,
                               initialW=initialW,
                               EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unknown model name: %s" % model_name
        sys.exit(-1)
    model.to_gpu(gpu)

    opt = optimizers.SMORMS3()
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
    opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    print "[info] Evaluating on the validation sentences ..."
    loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab)
    perp = math.exp(loss_data)
    print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \
        % (perp, acc_data*100)

    it = 0
    n_train = len(sents_train)
    vocab_size = model.vocab_size
    for epoch in xrange(1, MAX_EPOCH + 1):
        perm = np.random.permutation(n_train)
        for data_i in xrange(0, n_train, batch_size):
            if data_i + batch_size > n_train:
                break
            words = sents_train[perm[data_i:data_i + batch_size]]

            if model_name == "bd_lstm":
                xs, ms = utils.make_batch(words,
                                          train=True,
                                          tail=False,
                                          mask=True)
                ys = model.forward(xs=xs, ms=ms, train=True)
            else:
                xs = utils.make_batch(words, train=True, tail=False)
                ys = model.forward(ts=xs, train=True)

            ys = F.concat(ys, axis=0)
            ts = F.concat(xs, axis=0)
            ys = F.reshape(ys, (-1, vocab_size))  # (TN, |V|)
            ts = F.reshape(ts, (-1, ))  # (TN,)

            loss = F.softmax_cross_entropy(ys, ts)
            acc = F.accuracy(ys, ts, ignore_label=-1)

            model.zerograds()
            loss.backward()
            loss.unchain_backward()
            opt.update()
            it += 1

            loss_data = float(cuda.to_cpu(loss.data))
            perp = math.exp(loss_data)
            acc_data = float(cuda.to_cpu(acc.data))
            print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \
                    % (it, epoch, data_i+batch_size, n_train,
                        float(data_i+batch_size)/n_train*100,
                        perp, acc_data*100)

            if it % EVAL == 0:
                print "[info] Evaluating on the validation sentences ..."
                loss_data, acc_data = evaluate(model, model_name, sents_val,
                                               ivocab)
                perp = math.exp(loss_data)
                print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \
                        % (it, epoch, perp, acc_data*100)

                serializers.save_npz(
                    path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch),
                    model)
                utils.save_word2vec(
                    path_save_head + ".iter_%d.epoch_%d.vectors.txt" %
                    (it, epoch), utils.extract_word2vec(model, vocab))
                print "[info] Saved."

    print "[info] Done."
Esempio n. 14
0
        model_path = os.path.join(MODEL_DIR, 'majority.pkl')
        if os.path.exists(model_path):
            with open(model_path, 'rb') as f:
                model = pickle.load(f)
        else:
            model = models.MajorityBaseline(proto_instances, PROPERTIES)
            with open(model_path, 'wb') as f:
                pickle.dump(model, f)
    elif args.model_type == 'lstm':
        w2i, i2w = data['dicts']
        emb_np = data['emb_np']
        X, y = data['lstm_data']

        model = models.LSTM(vocab_size=len(w2i),
                            emb_size=int(args.glove_d),
                            h_size=args.h_size,
                            padding_idx=w2i[PAD_TOKEN],
                            emb_np=emb_np,
                            properties=PROPERTIES)

        print('Finished building lstm model!')

    elif args.model_type == 'logreg':
        X = {}
        y = {}
        for split in SPLITS:
            X_split, y_split = data_utils.get_ins_outs(args,
                                                       proto_instances[split],
                                                       properties=PROPERTIES,
                                                       sents=sents,
                                                       w2e=w2e)
            X[split] = X_split
Esempio n. 15
0
parser.add_argument('--save', type=str, default='drum-nn.pth',
                    help='path to save the model')
args = parser.parse_args()

np.random.seed(args.seed)
torch.manual_seed(args.seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

midi_encoder = models.MidiEncoder(q=args.quantization)
data = models.DrumData(args.data, midi_encoder)
print(f'Corpus has a drumset of {len(midi_encoder.index)} items.')
rnn = models.LSTM(
    input_size=len(midi_encoder.index),
    hidden_size=args.hid_dim,
    emb_dim=args.emb_dim,
    num_classes=len(midi_encoder.index),
    n_layers=args.n_layers,
    dropout=args.dropout
).to(device)


def current_lr(optimizer):
    for param_group in optimizer.param_groups:
        lr = param_group.get('lr')
        if lr is not None:
            return lr

lr = args.lr
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr, weight_decay=args.weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'min', verbose=True, patience=10, threshold=1e-4)
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length,
         learning_rate, warmup_proportion, early_stopping_criteria, num_layers,
         hidden_dim, bidirectional, dropout, filter_sizes, embedding_file,
         model_name, use_mongo, vm, subtask, _run):

    #Logger
    directory_checkpoints = f"results/checkpoints/{_run._id}/"
    directory = f"results/{_run._id}/"

    #Batch sizes
    batch_sizes = [int(train_bs), int(val_bs), int(test_bs)]
    batch_size = int(train_bs)

    if "BERT" in model_name:  #Default = False, if BERT model is used then use_bert is set to True
        use_bert = True
    else:
        use_bert = False

    if vm == "google":
        directory = f"results-bert-google/{_run._id}/"
    elif vm == "aws":
        directory = f"results-bert-aws/{_run._id}/"

    #Data
    if use_bert:
        train_dataloader, val_dataloader, test_dataloader = get_data_bert(
            int(max_seq_length), batch_sizes, subtask)
    else:
        embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data(
            int(max_seq_length),
            embedding_file=embedding_file,
            batch_size=batch_size,
            subtask=subtask)

    #Model
    if model_name == "MLP":
        model = models.MLP(embedding_matrix, embedding_dim, vocab_size,
                           int(hidden_dim), dropout, output_dim)
    if model_name == "MLP_Features":
        model = models.MLP_Features(embedding_matrix, embedding_dim,
                                    vocab_size, int(hidden_dim), 14, dropout,
                                    output_dim)
        print(model)
    elif model_name == "CNN":
        model = models.CNN(embedding_matrix, embedding_dim, vocab_size,
                           dropout, filter_sizes, output_dim)
        print(model)
    elif model_name == "LSTM":
        model = models.LSTM(embedding_matrix, embedding_dim, vocab_size,
                            int(hidden_dim), dropout, int(num_layers),
                            bidirectional, output_dim)
        print(model)
    elif model_name == "LSTMAttention":
        model = models.LSTMAttention(embedding_matrix, embedding_dim,
                                     vocab_size, int(hidden_dim), dropout,
                                     int(num_layers), bidirectional,
                                     output_dim)
        print(model)
    elif model_name == "BERTFreeze":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        for param in model.bert.parameters():
            param.requires_grad = False
            print(param)
            print(param.requires_grad)
        print(model)
    elif model_name == "BERT":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        print(model)
    elif model_name == "BERTLinear":
        model = models.BertLinear(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLinearFreeze":
        model = models.BertLinearFreeze(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLinearFreezeEmbeddings":
        model = models.BertLinearFreezeEmbeddings(hidden_dim, dropout,
                                                  output_dim)
        print(model)
    elif model_name == "BERTLSTM":
        model = models.BertLSTM(hidden_dim, dropout, bidirectional, output_dim)
        print(model)
    elif model_name == "BERTNonLinear":
        model = models.BertNonLinear(dropout, output_dim)
        print(model)
    elif model_name == "BERTNorm":
        model = models.BertNorm(dropout, output_dim)
        print(model)

    model = model.to(device)

    #Loss and optimizer
    #optimizer = optim.Adam([{'params': model.parameters(), 'weight_decay': 0.1}], lr=learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = F.cross_entropy

    #Scheduler
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 50], gamma=0.1)

    #Training and evaluation
    print('Training and evaluation for {} epochs...'.format(num_epochs))
    train_metrics, val_metrics = train_and_evaluate(
        num_epochs, model, optimizer, loss_fn, train_dataloader,
        val_dataloader, early_stopping_criteria, directory_checkpoints,
        use_bert, use_mongo)
    train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv(
        directory + "val_metrics.csv")

    #Test
    print('Testing...')
    load_checkpoint(directory_checkpoints + "best_model.pth.tar", model)

    #Add artifacts
    #ex.add_artifact(directory+"best_model.pth.tar")
    #ex.add_artifact(directory+"last_model.pth.tar")

    test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader,
                                  device, use_bert)
    if use_mongo: log_scalars(test_metrics, "Test")

    test_metrics_df = pd.DataFrame(test_metrics)
    #test_metrics_df = pd.DataFrame(test_metrics, index=["NOT","OFF"])
    print(test_metrics)
    test_metrics_df.to_csv(directory + "test_metrics.csv")

    id_nummer = f'{_run._id}'

    results = {
        'id': id_nummer,
        'loss': np.round(np.mean(val_metrics['loss']), 4),
        'accuracy': test_metrics['accuracy'],
        'recall': test_metrics['recall'],
        'precision': test_metrics['precision'],
        'f1': test_metrics['f1'],
        'learning_rate': learning_rate,
        'hidden_dim': hidden_dim,
        'status': 'ok'
    }

    return results
Esempio n. 17
0
T_y_train = torch.reshape(
    T_y_train, (-1, BATCH_SIZE, int(MAX_LEN / 2), N_FEATURES)).to(device)
print("T_X_train.shape", T_X_train.shape)

T_X_test = torch.FloatTensor(X_test)
T_y_test = torch.FloatTensor(Y_test)
T_X_test = torch.reshape(T_X_test,
                         (-1, int(MAX_LEN / 2), N_FEATURES)).to(device)
T_y_test = torch.reshape(T_y_test,
                         (-1, int(MAX_LEN / 2), N_FEATURES)).to(device)
print("T_X_train.shape", T_X_test.shape)

print("model declaration")
#-------------- model declaration
model = models.LSTM(hidden_size=HIDDEN_SIZE,
                    nfeatures=N_FEATURES,
                    num_layers=NUM_LAYERS,
                    dropout=DROPOUT).to(device)
loss_function = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
losses = []
test_losses = []
h = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE).to(device)
c = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE).to(device)

print("training model")
#-------------- model training
for i in range(EPOCHS):
    model.train()
    loss = 0
    predictions, (_, _) = model(T_X_test)
    single_loss = loss_function(predictions, T_y_test)
Esempio n. 18
0
def make_preds_labels(datadir, expdir, save_file):
    model = models.LSTM(num_layers=3, hidden_dim=512, bidirectional=True)

    wts = glob.glob(os.path.join(expdir, '*.pt'))

    gender_dataset = gender_subset.ESPnetGenderBucketDataset(
        os.path.join(datadir, 'dump/test_dev93/deltafalse/data.json'),
        os.path.join(datadir, 'lang_1char/train_si284_units.txt'),
        os.path.join(datadir, 'test_dev93/spk2gender'),
        num_buckets=10)

    lines = {}
    for wt in wts:
        model.load_state_dict(torch.load(wt))
        device = torch.device('cpu')
        model.to(device)

        counter = [0, 0]
        idxs = [0, 0]
        for i in range(200, len(gender_dataset)):
            if sum(counter) >= 2:
                break
            data = gender_dataset[i]

            if gender_dataset.utt2gender[
                    data['utt_id']] == 'f' and counter[0] == 0:
                idxs[0] = i
                counter[0] = 1
            elif gender_dataset.utt2gender[
                    data['utt_id']] == 'm' and counter[1] == 0:
                idxs[1] = i
                counter[1] = 1
            else:
                continue

        lines[wt] = {}
        lines[wt]['f'] = []
        lines[wt]['m'] = []
        for i, idx in enumerate(idxs):
            data = gender_dataset[idx]
            feat = data['feat'].copy()[None, ...]

            log_probs, embed = model(torch.tensor(feat))
            log_probs = log_probs.detach().numpy()
            labels = np.array(data['label'])

            preds, to_remove = decoder.batch_greedy_ctc_decode(
                log_probs, zero_infinity=True)
            preds = preds[preds != -1]

            pred_words = decoder.compute_words(preds, gender_dataset.idx2tok)
            label_words = decoder.compute_words(labels, gender_dataset.idx2tok)

            gndr = 'f' if i == 0 else 'm'
            lines[wt][gndr].append(f'{gndr}:')
            lines[wt][gndr].append(f'Predicted:\n{" ".join(pred_words)}')
            lines[wt][gndr].append(f'Label:\n{" ".join(label_words)}')

    with open(save_file, 'w') as f:
        for wt in lines.keys():
            f.write(f'{wt}\n')
            for key in lines[wt].keys():
                for line in lines[wt][key]:
                    f.write(line + '\n')
                f.write('\n')
            f.write('\n')
Esempio n. 19
0
                cnt = 0
            else:
                cnt += 1
            if (cnt == 3): break
            if (avg_best == 1.0): break

    return avg_best, test_best

data_loader_tr, data_loader_val, data_loader_test, vocab = prepare_data(
    '/home/nayeon/fakenews/data_new/preprocessed_new_{}_wtitle.pickle',
    constant.batch_size)

if constant.use_bert:
    article_model = models.LSTM(vocab=vocab,
                                embedding_size=constant.emb_dim,
                                hidden_size=constant.hidden_dim,
                                num_layers=constant.n_layers,
                                pretrain_emb=constant.pretrain_emb)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    title_model = BertModel.from_pretrained('bert-base-uncased')
    LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=768)
elif constant.use_utransformer:
    article_model = models.UTransformer(
        vocab=vocab,
        embedding_size=constant.emb_dim,
        hidden_size=constant.hidden_dim,
        num_layers=constant.max_hops_article,
        num_heads=constant.num_heads,
        total_key_depth=constant.key_value_depth,
        total_value_depth=constant.key_value_depth,
        filter_size=constant.filter_size_article,
Esempio n. 20
0
RESULTS = []
MARGINS = [0.2]
MAX_EPOCHS = 50
BATCH_SIZE = 32
FILTER_WIDTHS = [3]
POOL_METHOD = "average"
FEATURE_DIMS = [600]
DROPOUT_PS = [0.3]
NUM_HIDDEN_UNITS = [240]
LEARNING_RATES = [1E-3]
MODELS = []
##############################################################################
LSTM_HYPERPARAMETERS = itertools.product(MARGINS, NUM_HIDDEN_UNITS,
                                         LEARNING_RATES)
for margin, num_hidden_units, learning_rate in LSTM_HYPERPARAMETERS:
    model = models.LSTM(EMBEDDINGS, num_hidden_units, POOL_METHOD, CUDA)
    criterion = helpers.MaxMarginLoss(margin)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=learning_rate)
    model, mrr = train_utils.train_model(model, optimizer, criterion, DATA, \
                                    MAX_EPOCHS, BATCH_SIZE, CUDA)
    torch.save(model.state_dict(), "./lstm_" +\
                                    str(margin) + "_" +\
                                    str(num_hidden_units) + "_" +\
                                    str(learning_rate))
    MODELS.append((mrr, margin, num_hidden_units, learning_rate))
##############################################################################
CNN_HYPERPARAMETERS = itertools.product(MARGINS, FILTER_WIDTHS, FEATURE_DIMS,
                                        DROPOUT_PS, LEARNING_RATES)
for margin, filter_width, feature_dim, dropout_p, learning_rate in CNN_HYPERPARAMETERS:
    model = models.CNN(EMBEDDINGS, filter_width, POOL_METHOD, feature_dim,
Esempio n. 21
0
def cross_validation(kfold=10):
    with open("data_new/by_article_ids.pickle", "rb") as ids_file:
        ids = pickle.load(ids_file)
    with open("data_new/preprocessed_byarticle_data.pickle", "rb")as data_file:
        data = pickle.load(data_file)
    with open("data_new/by_article_labels.pickle", "rb") as labels_file:
        labels = pickle.load(labels_file)
    with open("/home/nayeon/fakenews/data_new/vocab_trim4.pickle", 'rb') as vocab_file:
        vocab = pickle.load(vocab_file)

    if not constant.use_bert:
        # for basic LSTM model
        article_model = models.LSTM(vocab=vocab, 
                        embedding_size=constant.emb_dim, 
                        hidden_size=constant.hidden_dim, 
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb
                        )
        title_model = models.LSTM(vocab=vocab,
                        embedding_size=constant.emb_dim,
                        hidden_size=constant.hidden_dim_tit,
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb
                        )
        article_model = load_model(article_model, model_name="article_model")
        title_model = load_model(title_model, model_name="title_model")
    else:
        from pytorch_pretrained_bert import BertTokenizer, BertModel
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_model = BertModel.from_pretrained('bert-base-uncased')
        if not constant.bert_from_scratch:
            state = torch.load("bert_model/pytorch_model.bin")
            bert_model.load_state_dict(state)
        article_model = bert_model
        title_model = bert_model
        if constant.use_bert_plus_lstm:
            lstm_article = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim, 
                                   num_layers=constant.n_layers, bidirectional=False, batch_first=True)
            lstm_title = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim_tit,
                                 num_layers=constant.n_layers, bidirectional=False, batch_first=True)
            lstm_article.load_state_dict(torch.load("bert_model/lstm_article2.bin"))
            lstm_title.load_state_dict(torch.load("bert_model/lstm_title2.bin"))
    
    # set average test acc
    avg_test_acc = 0
    best_acc = 0
    k = 0
    kf = KFold(n_splits=kfold)
    for train_index, test_index in kf.split(ids):
        k += 1
        print("k:", k)
        # get 25 true 25 false for validation #
        ids_train, ids_val = [], []
        data_train, data_val = {}, {}
        labels_train, labels_val = {}, {}
        cnt_true, cnt_false = 0, 0
        for index in train_index:
            id_ = ids[index]
            if labels[id_] == "true":
                if cnt_true < 25:
                    cnt_true += 1
                    ids_val.append(id_)
                    data_val[id_] = data[id_]
                    labels_val[id_] = labels[id_]
                else:
                    ids_train.append(id_)
                    data_train[id_] = data[id_]
                    labels_train[id_] = labels[id_]
            else:
                if cnt_false < 25:
                    cnt_false += 1
                    ids_val.append(id_)
                    data_val[id_] = data[id_]
                    labels_val[id_] = labels[id_]
                else:
                    ids_train.append(id_)
                    data_train[id_] = data[id_]
                    labels_train[id_] = labels[id_]
        # get test set from test_index
        ids_test, data_test, labels_test = [], {}, {}
        for index in test_index:
            id_ = ids[index]
            ids_test.append(id_)
            data_test[id_] = data[id_]
            labels_test[id_] = labels[id_]
        train = (ids_train, data_train, labels_train)
        val = (ids_val, data_val, labels_val)
        test = (ids_test, data_test, labels_test)

        # prepare by article cross validation data
        if constant.aug_count != '':
            data_loader_train, data_loader_val, data_loader_test, ids_val_dict, ids_test_dict = prepare_byarticle_cross_validation(train, val, test, constant.batch_size, constant.aug_count)
        else:
            data_loader_train, data_loader_val, data_loader_test = prepare_byarticle_cross_validation(train, val, test, constant.batch_size, constant.aug_count)

        # need to init the final Classifier for each fold
        if constant.use_bert:
            if constant.use_bert_plus_lstm:
                Classifier = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)
                # Classifier.load_state_dict(torch.load("bert_model/classifier_bypublisher2.bin"))
            else:
                Classifier = models.Classifier(hidden_dim1=768, hidden_dim2=768)
        else:
            Classifier = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)

        if constant.USE_CUDA:
            if constant.use_bert_plus_lstm:
                lstm_article.cuda()
                lstm_title.cuda()
            article_model.cuda()
            title_model.cuda()
            Classifier.cuda()

        criterion = nn.BCELoss()

        if constant.optimizer=='adam':
            opt = torch.optim.Adam(Classifier.parameters(), lr=constant.lr_classi, weight_decay=constant.weight_decay)
        elif constant.optimizer=='adagrad':
            opt = torch.optim.Adagrad(Classifier.parameters(), lr=constant.lr_classi)
        elif constant.optimizer=='sgd':
            opt = torch.optim.SGD(Classifier.parameters(), lr=constant.lr_classi, momentum=0.9)
        
        # set lr scheduler
        # scheduler = StepLR(opt, step_size=1, gamma=0.8)
        
        # set tensorboard folder name
        if constant.use_bert:
            experiment_name = "BERT_FineTune_aug{0}_LRlr{1}_k{2}".format(constant.aug_count, constant.lr_classi, k)
        else:
            experiment_name = "LSTM_FineTune_aug{0}_LRlr{1}_k{2}".format(constant.aug_count, constant.lr_classi, k)
        
        logdir = "tensorboard/" + experiment_name + "/"
        writer = SummaryWriter(logdir)
        global_steps = 0
        best_val_acc = 0
        # training and testifng
        for e in range(constant.max_epochs):
            # scheduler.step()
            article_model.train()
            title_model.train()
            Classifier.train()
            if constant.use_bert_plus_lstm:
                lstm_article.train()
                lstm_title.train()
            loss_log = []
            f1_log = 0
            acc_log = 0
            # training
            pbar = tqdm(enumerate(data_loader_train),total=len(data_loader_train))
            for i, (X, x_len, tit, tit_len, y, ind) in pbar:
                opt.zero_grad()
                if constant.use_bert:
                    X = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in X]
                    tit = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in tit]
                    X, segments_ids_article, tit, segments_ids_tit = padding_for_bert(X, tit)
                    if constant.USE_CUDA:
                        X, segments_ids_article, tit, segments_ids_tit, y = X.cuda(), segments_ids_article.cuda(), tit.cuda(), segments_ids_tit.cuda(), y.cuda()
                    encoded_article_layers, _ = article_model(X, segments_ids_article)
                    encoded_tit_layers, _ = title_model(tit, segments_ids_tit)
                    if constant.use_bert_plus_lstm:
                        _, article_hidden = lstm_article(encoded_article_layers[-1])
                        _, title_hidden = lstm_title(encoded_tit_layers[-1])
                        article_feat = article_hidden[-1][-1]
                        title_feat = title_hidden[-1][-1]
                    else:
                        article_feat = torch.sum(encoded_article_layers[-1], dim=1)
                        title_feat = torch.sum(encoded_tit_layers[-1], dim=1) #[batch_size, hidden_size]
                else:
                    article_feat = article_model.feature(X, x_len)
                    title_feat = title_model.feature(tit, tit_len)
                feature = torch.cat((article_feat, title_feat), dim=1)
                pred_prob = Classifier(feature)
                
                loss = criterion(pred_prob, y)
                loss.backward()
                opt.step()

                loss_log.append(loss.item())
                accuracy, microPrecision, microRecall, microF1 = getMetrics(pred_prob.detach().cpu().numpy(), y.cpu().numpy())
                f1_log += microF1
                acc_log += accuracy
                pbar.set_description("(Epoch {}) TRAIN F1:{:.4f} TRAIN LOSS:{:.4f} ACCURACY:{:.4f}".format((e+1), f1_log/float(i+1), np.mean(loss_log), acc_log/float(i+1)))

                writer.add_scalars('train', {'loss': np.mean(loss_log),
                                            'acc': acc_log/float(i+1),
                                            'f1': f1_log/float(i+1)}, global_steps)
                global_steps+=1
            
            """
                validate and test
                1. Get the test accuracy result from the model that gets the best accuracy in validation
                2. Whenever we find better accuracy result in the validation set, we need to test the model in the test 
                set and get the updated test set accuracy result.
                3. No need to save model during cross validation (cross validation is to find the best model)
            """
            article_model.eval()
            title_model.eval()
            Classifier.eval()
            if constant.use_bert_plus_lstm:
                lstm_article.eval()
                lstm_title.eval()
            print("Evaluation on validation set")
            use_add_feature_flag = constant.use_emo2vec_feat or constant.use_url
            if constant.use_bert:
                if constant.aug_count != '':
                    accuracy, pred, id_ = eval_bert_with_chunked_data(article_model, title_model, Classifier, data_loader_val, tokenizer, ids_val_dict, None, writer, e, False)
                else:
                    if constant.use_bert_plus_lstm:
                        accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_val, tokenizer, lstm_article, lstm_title, use_add_feature_flag, writer, e, False)
                    else:
                        accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_val, tokenizer, None, None, use_add_feature_flag, writer, e, False)
            else:
                accuracy, pred, id_ = eval_tit_lstm(article_model, title_model, Classifier, data_loader_val, use_add_feature_flag, writer, e, False)
            
            # find better accuracy in the validation set, need to test the model in the testset
            if(accuracy > best_val_acc):
                print("Find better model, test it on test set")
                best_val_acc = accuracy
                if constant.use_bert:
                    if constant.aug_count != '':
                        accuracy, pred, id_ = eval_bert_with_chunked_data(article_model, title_model, Classifier, data_loader_test, tokenizer, ids_test_dict, None, writer, e, True)
                    else:
                        if constant.use_bert_plus_lstm:
                            accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_test, tokenizer, lstm_article, lstm_title, use_add_feature_flag, writer, e, True)
                        else:
                            accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_test, tokenizer, None, None, use_add_feature_flag, writer, e, True)
                else:
                    accuracy, pred, id_ = eval_tit_lstm(article_model, title_model, Classifier, data_loader_test, use_add_feature_flag, writer, e, True)
                test_acc = accuracy
                if best_val_acc + test_acc > 1.53:
                    torch.save(Classifier.state_dict(), "bert_model/classifier.bin")
                    print("Classifier has been saved in bert_model/classifier.bin")
        # finish one fold, need to accumulate the test_acc (will do average of accuracy after k folds)
        avg_test_acc += test_acc
    
    # after k folds cross validation, get the final average test accuracy
    avg_test_acc = avg_test_acc * 1.0 / kfold
    print("After {0} folds cross validation, the final accuracy of {1} is {2}".format(kfold, constant.manual_name, avg_test_acc))
Esempio n. 22
0
def train(aug_count=""):
    # prepare data_loader and vocab
    if constant.train_cleaner_dataset:
        data_loader_train, data_loader_test, vocab = prepare_filtered_data(batch_size=constant.batch_size)
    else:
        data_loader_train, data_loader_test, vocab = prepare_byarticle_data(aug_count=aug_count, batch_size=constant.batch_size)
    
    # load parameters, LR is for fine tune
    if constant.use_bert:
        from pytorch_pretrained_bert import BertTokenizer, BertModel
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_model = BertModel.from_pretrained('bert-base-uncased')
        if not constant.bert_from_scratch:
            state = torch.load("bert_model/pytorch_model.bin")
            bert_model.load_state_dict(state)
        article_model = bert_model
        title_model = bert_model
        # print("finish bert model loading")
        if constant.train_cleaner_dataset:
            lstm_article = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim, 
                                   num_layers=constant.n_layers, bidirectional=False, batch_first=True)
            lstm_title = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim_tit,
                                 num_layers=constant.n_layers, bidirectional=False, batch_first=True)
            LR = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)
        else:
            LR = models.Classifier(hidden_dim1=768, hidden_dim2=768)
    else:
        # for basic LSTM model
        article_model = models.LSTM(vocab=vocab, 
                        embedding_size=constant.emb_dim, 
                        hidden_size=constant.hidden_dim, 
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb,
                        )
        title_model = models.LSTM(vocab=vocab,
                        embedding_size=constant.emb_dim,
                        hidden_size=constant.hidden_dim_tit,
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb,
                        )
#         LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)
        LR = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)

        article_model = load_model(article_model, model_name="article_model")
        title_model = load_model(title_model, model_name="title_model")
        
    if constant.USE_CUDA:
        article_model.cuda()
        title_model.cuda()
        LR.cuda()
        if constant.train_cleaner_dataset:
            lstm_article.cuda()
            lstm_title.cuda()

    criterion = nn.BCELoss()
    
    if constant.train_cleaner_dataset:
        model = [
                {"params": lstm_article.parameters(), "lr": constant.lr_lstm},
                {"params": lstm_title.parameters(), "lr": constant.lr_title}, 
                {"params": LR.parameters(), "lr": constant.lr_classi},
            ]
        if constant.optimizer=='adam':
            opt = torch.optim.Adam(model, lr=constant.lr_classi, weight_decay=constant.weight_decay)
        elif constant.optimizer=='adagrad':
            opt = torch.optim.Adagrad(model, lr=constant.lr_classi)
        elif constant.optimizer=='sgd':
            opt = torch.optim.SGD(model, lr=constant.lr_classi, momentum=0.9)
    else:
        if constant.optimizer=='adam':
            opt = torch.optim.Adam(LR.parameters(), lr=constant.lr_classi, weight_decay=constant.weight_decay)
        elif constant.optimizer=='adagrad':
            opt = torch.optim.Adagrad(LR.parameters(), lr=constant.lr_classi)
        elif constant.optimizer=='sgd':
            opt = torch.optim.SGD(LR.parameters(), lr=constant.lr_classi, momentum=0.9)

    # test the result without fine tune
    # print("testing without fine tune")
    # accuracy = eval_tit_lstm(article_model, title_model, LR, data_loader_test, False)

    # set tensorboard folder name
    if constant.use_bert:
        experiment_name = "BERT_FineTune_aug{0}_LRlr{1}".format(constant.aug_count, constant.lr_classi)
    else:
        experiment_name = "LSTM_FineTune_aug{0}_LRlr{1}".format(constant.aug_count, constant.lr_classi)
    
    logdir = "tensorboard/" + experiment_name + "/"
    writer = SummaryWriter(logdir)

    test_best = 0
    cnt = 0
    global_steps = 0
    for e in range(constant.max_epochs):
        article_model.train()
        title_model.train()
        LR.train()
        if constant.train_cleaner_dataset:
            lstm_article.train()
            lstm_title.train()
        loss_log = []
        f1_log = 0
        acc_log = 0

        # training
        pbar = tqdm(enumerate(data_loader_train),total=len(data_loader_train))
        for i, (X, x_len, tit, tit_len, y, ind) in pbar:
            opt.zero_grad()
            if constant.use_bert:
                X = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in X]
                tit = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in tit]
                # padding
                X, segments_ids_article, tit, segments_ids_tit = padding_for_bert(X, tit)
                if constant.USE_CUDA:
                    X, segments_ids_article, tit, segments_ids_tit, y = X.cuda(), segments_ids_article.cuda(), tit.cuda(), segments_ids_tit.cuda(), y.cuda()
                encoded_article_layers, _ = article_model(X, segments_ids_article)
                encoded_tit_layers, _ = title_model(tit, segments_ids_tit)
                if constant.train_cleaner_dataset:
                    _, article_hidden = lstm_article(encoded_article_layers[-1])
                    _, title_hidden = lstm_title(encoded_tit_layers[-1])
                    article_feat = article_hidden[-1][-1]
                    title_feat = title_hidden[-1][-1]
                else:
                    article_feat = torch.sum(encoded_article_layers[-1], dim=1)
                    title_feat = torch.sum(encoded_tit_layers[-1], dim=1) #[batch_size, hidden_size]
            else:
                article_feat = article_model.feature(X, x_len)
                title_feat = title_model.feature(tit, tit_len)
            feature = torch.cat((article_feat, title_feat), dim=1)
            pred_prob = LR(feature)
            
            loss = criterion(pred_prob, y)
            loss.backward()
            opt.step()

            loss_log.append(loss.item())
            accuracy, microPrecision, microRecall, microF1 = getMetrics(pred_prob.detach().cpu().numpy(), y.cpu().numpy())
            f1_log += microF1
            acc_log += accuracy
            pbar.set_description("(Epoch {}) TRAIN F1:{:.4f} TRAIN LOSS:{:.4f} ACCURACY:{:.4f}".format((e+1), f1_log/float(i+1), np.mean(loss_log), acc_log/float(i+1)))

            writer.add_scalars('train', {'loss': np.mean(loss_log),
                                         'acc': acc_log/float(i+1),
                                         'f1': f1_log/float(i+1)}, global_steps)
            global_steps+=1
        
        article_model.eval()
        title_model.eval()
        LR.eval()
        if constant.train_cleaner_dataset:
            lstm_article.eval()
            lstm_title.eval()
        # testing
        if(e % 1 == 0):
            print("Evaluation on Test")
            use_add_feature_flag = constant.use_emo2vec_feat or constant.use_url
            if constant.use_bert:
                if constant.train_cleaner_dataset:
                    accuracy, pred, id_ = eval_bert(article_model, title_model, LR, data_loader_test, tokenizer, lstm_article, lstm_title, use_add_feature_flag, writer, e, True)
                else:
                    accuracy, pred, id_ = eval_bert(article_model, title_model, LR, data_loader_test, tokenizer, None, None, use_add_feature_flag, writer, e, True)
            else:
                accuracy, pred, id_ = eval_tit_lstm(article_model, title_model, LR, data_loader_test, use_add_feature_flag, writer, e, True)
            
            if(accuracy > test_best):
                test_best = accuracy
                print("Find better model. Saving model ...")
                cnt = 0
                if constant.train_cleaner_dataset:
                    torch.save(lstm_article.state_dict(), "bert_model/by_publisher/lstm_article_"+str(constant.hidden_dim)+"_"+str(constant.hidden_dim_tit)+"_"+str(test_best)+".bin")
                    torch.save(lstm_title.state_dict(), "bert_model/by_publisher/lstm_title_"+str(constant.hidden_dim)+"_"+str(constant.hidden_dim_tit)+"_"+str(test_best)+".bin")
                    torch.save(LR.state_dict(), "bert_model/by_publisher/classifier_bypublisher_"+str(constant.hidden_dim)+"_"+str(constant.hidden_dim_tit)+"_"+str(test_best)+".bin")
                    print("The lstm_article lstm_title classifier_bypublisher have been saved!")
                else:
                    torch.save(LR.state_dict(), "bert_model/finetune_classi_for_tunebert_"+str(accuracy)+".bin")
                    print("The fine tune classifier has been saved!")
            else:
                cnt += 1
            if(cnt == 10): 
                # save prediction and gold
                with open('pred/{0}_pred.pickle'.format(experiment_name), 'wb') as handle:
                    pickle.dump({"preds":pred, "ids":id_}, handle, protocol=pickle.HIGHEST_PROTOCOL)
                break
            if(test_best == 1.0): 
                # save prediction and gold
                with open('pred/{0}_pred.pickle'.format(experiment_name), 'wb') as handle:
                    pickle.dump({"preds":pred, "ids":id_}, handle, protocol=pickle.HIGHEST_PROTOCOL)
                break
Esempio n. 23
0
def make_tsne(datadir,
              expdir,
              save_file,
              title,
              adversarial=False,
              test=False,
              mean=True):
    if adversarial:
        model = models_gender.LSTM_gender(num_layers=3)
    else:
        model = models.LSTM(num_layers=3)

    model_file = os.path.join(expdir, 'best.pt')
    model.load_state_dict(torch.load(model_file))

    if test:
        split = 'eval92'
    else:
        split = 'dev93'

    gender_dataset = gender_subset.ESPnetGenderBucketDataset(
        os.path.join(datadir, f'dump/test_{split}/deltafalse/data.json'),
        os.path.join(datadir, 'lang_1char/train_si284_units.txt'),
        os.path.join(datadir, f'test_{split}/spk2gender'),
        num_buckets=10)

    #     since pushkar uses whole sequence to predict gender, and that's too much to
    #     keep in memory, take a mean over all frame outputs from the model
    embeds = np.zeros((len(gender_dataset), 1024))
    genders = np.zeros(len(gender_dataset), dtype=np.int)
    for i in range(len(gender_dataset)):
        data = gender_dataset[i]
        feat = data['feat'].copy()[None, ...]

        if adversarial:
            y, gen_y, embed = model(torch.tensor(feat))
        else:
            _, embed = model(torch.tensor(feat))

        embed = embed.detach().numpy()[0]
        if mean:
            embeds[i, :] = np.mean(embed, axis=0)
        else:
            embeds[i, :] = embed[-1, :]

        utt = data['utt_id']
        genders[i] = 0 if gender_dataset.utt2gender[utt] == 'f' else 1

    tsne = TSNE(n_components=2, metric='cosine')
    tsne_embeds = tsne.fit_transform(embeds)

    f = tsne_embeds[genders == 0, :]
    m = tsne_embeds[genders == 1, :]
    plt.scatter(f[:, 0], f[:, 1], label='Female')
    plt.scatter(m[:, 0], m[:, 1], label='Male')

    plt.legend(loc='upper right')
    plt.title(f't-SNE of Female and Male embeddings for {title}')

    plt.axis('off')
    plt.tight_layout()
    plt.savefig(save_file)
    plt.clf()
Esempio n. 24
0
    "data/Panasonic 18650PF Data/0degC/Drive cycles/06-02-17_10.43 0degC_HWFET_Pan18650PF.mat",
    args.sequence_length, args.window_size)

train_loaders = list()
for d in train_datasets:
    temp = DataLoader(dataset=d, batch_size=args.batch_size, shuffle=True)
    train_loaders.append(temp)

# train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
validation_loader = DataLoader(dataset=validation_dataset,
                               batch_size=args.batch_size,
                               shuffle=False)

# Model, loss, and optimizer
if args.model == 'lstm':
    model = models.LSTM(args.input_size, args.hidden_size, args.num_layers,
                        args.num_classes, args.noise_std).to(device)
elif args.model == 'gru':
    model = models.GRU(args.input_size, args.hidden_size, args.num_layers,
                       args.num_classes, args.noise_std).to(device)
elif args.model == 'rnn':
    model = models.RNN(args.input_size, args.hidden_size, args.num_layers,
                       args.num_classes, args.noise_std).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

# Train the model
# total_step = len(train_loader)
# total_train_step = len(train_loader)
# total_val_step = len(validation_loader)
Esempio n. 25
0
def main(_):
    if not os.path.exists(FLAGS.local_path_in) or FLAGS.use_optimizer:
        utils_gcs.download_files_from_gcs(FLAGS.local_path_in,
                                          FLAGS.gcs_path_in)
    logging.info('Data downloaded successfully!')

    sequence_df = pd.read_hdf(
        os.path.join(FLAGS.local_path_in, FLAGS.seq_file), 'df')
    if FLAGS.balance_df:
        balance_df = pd.read_hdf(
            os.path.join(FLAGS.local_path_in, FLAGS.balance_df), 'df')
        sequence_df = sequence_df[sequence_df['url'].isin(balance_df['url'])]
    embeddings_dict = utils.get_n2v_graph_embedding(os.path.join(
        FLAGS.local_path_in, FLAGS.g_emb),
                                                    graph_gen=False,
                                                    normalize_type='minmax')
    x_sequence, y_label, label_list = utils.load_input_with_label(
        sequence_df, embeddings_dict, FLAGS.task)

    train_idx, val_idx, test_idx = utils.split_data_idx(
        len(x_sequence), FLAGS.train_ratio, FLAGS.val_ratio)
    train_batches = np.array_split(train_idx,
                                   len(train_idx) / FLAGS.batch_size)
    val_batches = np.array_split(val_idx, len(val_idx) / FLAGS.batch_size)
    test_batches = np.array_split(test_idx, len(test_idx) / FLAGS.batch_size)

    # model training/testing
    logging.info('FLAGS.epochs: %s', FLAGS.epochs)
    logging.info('FLAGS.batch_size: %s', FLAGS.batch_size)
    logging.info('FLAGS.learning_rate: %s', FLAGS.lr)

    dropout = 0.0 if FLAGS.num_layers == 1 else FLAGS.dropout

    print_gpu_info()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info('Current device is %s', device.type)
    if FLAGS.model == 'rnn':
        tm_model = models.RNN(in_dim=FLAGS.dim,
                              hid_dim=FLAGS.hid_dim,
                              num_label=len(label_list),
                              num_layers=FLAGS.num_layers,
                              dropout=dropout).to(device)
    elif FLAGS.model == 'lstm':
        tm_model = models.LSTM(in_dim=FLAGS.dim,
                               hid_dim=FLAGS.hid_dim,
                               num_label=len(label_list),
                               num_layers=FLAGS.num_layers,
                               dropout=dropout,
                               bi_direct=FLAGS.bi).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(tm_model.parameters(),
                                 lr=FLAGS.lr,
                                 weight_decay=1e-6)
    if FLAGS.use_optimizer:
        # example trial_name:
        # 'projects/{project_id}/locations/{region}/studies/{study_id}/trials/{trial_id}'
        trial_name_split = FLAGS.trial_name.split('/')
        project_id = trial_name_split[1]
        region = trial_name_split[3]
        study_id = trial_name_split[-3]
        trial_id = trial_name_split[-1]
        logging.info('project_id: %s, region: %s, study_id: %s, trial_id: %s',
                     project_id, region, study_id, trial_id)
        ml_client = optimizer_client.create_or_load_study(
            project_id, region, study_id, json.loads(FLAGS.study_config))
    for epoch in range(FLAGS.epochs):
        logging.info('Epoch %s', epoch)
        start_time = time.time()
        train(tm_model, x_sequence, y_label, train_batches, criterion,
              optimizer, device, FLAGS.print_step)
        val_f1 = val(tm_model, x_sequence, y_label, val_batches, device)
        test(tm_model, x_sequence, y_label, test_batches, device)
        if FLAGS.use_optimizer:
            elapsed_secs = int(time.time() - start_time)
            metric_list = [{'metric': 'valf1', 'value': float(val_f1)}]
            ml_client.report_intermediate_objective_value(
                epoch, elapsed_secs, metric_list, trial_id)

    logging.info('Experiment finished.')

    if FLAGS.save_model:
        filename = '%s_%s_%s' % (FLAGS.task, FLAGS.model, FLAGS.name)
        utils.save_model(tm_model, optimizer, FLAGS.local_path_out, filename)
        utils_gcs.upload_files_to_gcs(local_folder=FLAGS.local_path_out,
                                      gcs_path=FLAGS.gcs_path_out)
Esempio n. 26
0
import models

LSTM256 = models.LSTM(256, 3)
LSTM256.create_model()
Data = models.Data([1, 1, 1], 20000, 0.01)
Data.getData()
LSTM256.fit_model(5, Data)
LSTM256.print_stats()
LSTM256.model.summary()
LSTM256_States = models.States(1100, 1000)
LSTM256_States.create_unperturbed(LSTM256, Data)
LSTM256_States.create_pertrurbed(LSTM256, Data)
print(LSTM256_States.unperturbed - LSTM256_States.perturbed)
LSTM256_lyapunov = models.Lyapunov(LSTM256_States)
LSTM256_lyapunov.plot_exponent(LSTM256_States)

LSTM_Layer = LSTM256.model.layers[0]
LSTM_Layer.weights

import matplotlib.pyplot as plt

plt.plot(np.linspace(1, 10, 20), line)
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length,
         learning_rate, warmup_proportion, early_stopping_criteria, num_layers,
         hidden_dim, bidirectional, dropout, filter_sizes, embedding_file,
         model_name, use_mongo, _run):

    #Logger
    directory = f"results/{_run._id}/"

    #Batch sizes
    batch_sizes = [int(train_bs), int(val_bs), int(test_bs)]
    batch_size = int(train_bs)

    if "BERT" in model_name:  #Default = False, if BERT model is used then use_bert is set to True
        use_bert = True
    else:
        use_bert = False

    #Data
    if use_bert:
        train_dataloader, val_dataloader, test_dataloader = get_data_bert(
            int(max_seq_length), batch_sizes)
    else:
        embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data_features(
            int(max_seq_length),
            embedding_file=embedding_file,
            batch_size=batch_size)

    #Model
    if model_name == "MLP":
        model = models.MLP(embedding_matrix, embedding_dim, vocab_size,
                           int(hidden_dim), dropout, output_dim)
    if model_name == "MLP_Features":
        model = models.MLP_Features(embedding_matrix, embedding_dim,
                                    vocab_size, int(hidden_dim), 13, dropout,
                                    output_dim)
        print(model)
    elif model_name == "CNN":
        model = models.CNN(embedding_matrix, embedding_dim, vocab_size,
                           dropout, filter_sizes, output_dim)
        print(model)
    elif model_name == "LSTM":
        model = models.LSTM(embedding_matrix, embedding_dim, vocab_size,
                            int(hidden_dim), dropout, int(num_layers),
                            bidirectional, output_dim)
        print(model)
    elif model_name == "LSTMAttention":
        model = models.LSTMAttention(embedding_matrix, embedding_dim,
                                     vocab_size, int(hidden_dim), dropout,
                                     int(num_layers), bidirectional,
                                     output_dim)
        print(model)
    elif model_name == "BERT":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        print(model)
    elif model_name == "BERTLinear":
        model = models.BertLinear(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLSTM":
        model = models.BertLSTM(hidden_dim, dropout, output_dim)
        print(model)

    model = model.to(device)

    #Loss and optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = F.cross_entropy

    #Training and evaluation
    print('Training and evaluation for {} epochs...'.format(num_epochs))
    train_metrics, val_metrics = train_and_evaluate(
        num_epochs, model, optimizer, loss_fn, train_dataloader,
        val_dataloader, early_stopping_criteria, directory, use_bert,
        use_mongo)
    train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv(
        directory + "val_metrics.csv")

    #Test
    print('Testing...')
    load_checkpoint(directory + "best_model.pth.tar", model)

    test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader,
                                  device, use_bert)
    if use_mongo: log_scalars(test_metrics, "Test")

    test_metrics_df = pd.DataFrame(test_metrics)
    print(test_metrics)
    test_metrics_df.to_csv(directory + "test_metrics.csv")

    id_nummer = f'{_run._id}'

    results = {
        'id': id_nummer,
        'loss': np.round(np.mean(val_metrics['loss']), 4),
        'accuracy': test_metrics['accuracy'],
        'recall': test_metrics['recall'],
        'precision': test_metrics['precision'],
        'f1': test_metrics['f1'],
        'learning_rate': learning_rate,
        'hidden_dim': hidden_dim,
        'status': 'ok'
    }

    return results
Esempio n. 28
0
def main():
    batch_size = 1
    start = 0
    end = 100
    # read data
    df_data = pd.read_csv('data/' + FLAGS.dataset + '.csv')
    # split train/val/test
    if FLAGS.dataset == 'tree7':
        train_size = 2500
        validate_size = 1000
    if FLAGS.dataset == 'DJI':
        train_size = 2500
        validate_size = 1500
    if FLAGS.dataset == 'traffic':
        train_size = 1200
        validate_size = 200
    if FLAGS.dataset == 'arfima':
        train_size = 2000
        validate_size = 1200
    rmse_list = []
    mae_list = []
    for i in range(start, end):
        seed = i
        print('seed ----------------------------------', seed)
        x = np.array(df_data['x'])
        y = np.array(df_data['x'])
        x = x.reshape(-1, FLAGS.input_size)
        y = y.reshape(-1, FLAGS.output_size)
        # normalize the data
        scaler = MinMaxScaler(feature_range=(0, 1))
        x = scaler.fit_transform(x)
        y = scaler.fit_transform(y)
        # use this function to prepare the data for modeling
        data_x, data_y = create_dataset(x, y)

        # split into train and test sets
        train_x, train_y = data_x[0:train_size], data_y[0:train_size]
        validate_x, validate_y = data_x[train_size:train_size +
                                                    validate_size], \
                                 data_y[train_size:train_size +
                                                   validate_size]
        test_x, test_y = data_x[train_size + validate_size:len(data_y)], \
                         data_y[train_size + validate_size:len(data_y)]

        # reshape input to be [time steps,samples,features]
        train_x = np.reshape(train_x,
                             (train_x.shape[0], batch_size, FLAGS.input_size))
        validate_x = np.reshape(
            validate_x, (validate_x.shape[0], batch_size, FLAGS.input_size))

        test_x = np.reshape(test_x,
                            (test_x.shape[0], batch_size, FLAGS.input_size))
        train_y = np.reshape(train_y,
                             (train_y.shape[0], batch_size, FLAGS.output_size))
        validate_y = np.reshape(
            validate_y, (validate_y.shape[0], batch_size, FLAGS.output_size))
        test_y = np.reshape(test_y,
                            (test_y.shape[0], batch_size, FLAGS.output_size))

        torch.manual_seed(seed)
        # initialize model
        if FLAGS.algorithm == 'RNN':
            model = models.RNN(input_size=FLAGS.input_size,
                               hidden_size=FLAGS.hidden_size,
                               output_size=FLAGS.output_size)
        elif FLAGS.algorithm == 'LSTM':
            model = models.LSTM(input_size=FLAGS.input_size,
                                hidden_size=FLAGS.hidden_size,
                                output_size=FLAGS.output_size)
        elif FLAGS.algorithm == 'mRNN_fixD':
            model = models.MRNNFixD(input_size=FLAGS.input_size,
                                    hidden_size=FLAGS.hidden_size,
                                    output_size=FLAGS.output_size,
                                    k=FLAGS.K)
        elif FLAGS.algorithm == 'mRNN':
            model = models.MRNN(input_size=FLAGS.input_size,
                                hidden_size=FLAGS.hidden_size,
                                output_size=FLAGS.output_size,
                                k=FLAGS.K)
        elif FLAGS.algorithm == 'mLSTM_fixD':
            model = models.MLSTMFixD(input_size=FLAGS.input_size,
                                     hidden_size=FLAGS.hidden_size,
                                     output_size=FLAGS.output_size,
                                     k=FLAGS.K)
        elif FLAGS.algorithm == 'mLSTM':
            model = models.MLSTM(input_size=FLAGS.input_size,
                                 hidden_size=FLAGS.hidden_size,
                                 output_size=FLAGS.output_size,
                                 k=FLAGS.K)
        else:
            print('Algorithm selection ERROR!!!')
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=FLAGS.lr)
        best_loss = np.infty
        best_train_loss = np.infty
        stop_criterion = 1e-5
        rec = np.zeros((FLAGS.epochs, 3))
        epoch = 0
        val_loss = -1
        train_loss = -1
        cnt = 0

        def train():
            model.train()
            optimizer.zero_grad()
            target = torch.from_numpy(train_y).float()
            output, hidden_state = model(torch.from_numpy(train_x).float())
            with torch.no_grad():
                val_y, _ = model(
                    torch.from_numpy(validate_x).float(), hidden_state)
                target_val = torch.from_numpy(validate_y).float()
                val_loss = criterion(val_y, target_val)

            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            return loss, val_loss

        def compute_test(best_model):
            model = best_model
            train_predict, hidden_state = model(to_torch(train_x))
            train_predict = train_predict.detach().numpy()
            val_predict, hidden_state = model(to_torch(validate_x),
                                              hidden_state)
            test_predict, _ = model(to_torch(test_x), hidden_state)
            test_predict = test_predict.detach().numpy()
            # invert predictions
            test_predict_r = scaler.inverse_transform(test_predict[:, 0, :])
            test_y_r = scaler.inverse_transform(test_y[:, 0, :])
            # calculate error
            test_rmse = math.sqrt(
                mean_squared_error(test_y_r[:, 0], test_predict_r[:, 0]))
            test_mape = (abs((test_predict_r[:, 0] - test_y_r[:, 0]) /
                             test_y_r[:, 0])).mean()
            test_mae = mean_absolute_error(test_predict_r[:, 0], test_y_r[:,
                                                                          0])
            return test_rmse, test_mape, test_mae

        while epoch < FLAGS.epochs:
            _time = time.time()
            loss, val_loss = train()
            if val_loss < best_loss:
                best_loss = val_loss
                best_epoch = epoch
                best_model = deepcopy(model)
            # stop_criteria = abs(criterion(val_Y, target_val) - val_loss)
            if (best_train_loss - loss) > stop_criterion:
                best_train_loss = loss
                cnt = 0
            else:
                cnt += 1
            if cnt == FLAGS.patience:
                break
            # save training records
            time_elapsed = time.time() - _time
            rec[epoch, :] = np.array([loss, val_loss, time_elapsed])
            print("epoch: {:2.0f} train_loss: {:2.5f} val_loss: {:2.5f} "
                  "time: {:2.1f}s".format(epoch, loss.item(), val_loss.item(),
                                          time_elapsed))
            epoch = epoch + 1

        # make predictions
        test_rmse, test_mape, test_mae = compute_test(best_model)

        rmse_list.append(test_rmse)
        mae_list.append(test_mae)
        print('RMSE:{}'.format(rmse_list))
        print('MAE:{}'.format(mae_list))
import config
import models


def huber_approx_obj(preds, dtrain):
    '''
    xgboost optimizing function for mean absolute error
    '''
    d = preds - dtrain  #add .get_labels() for xgb.train()
    h = 1  #h is delta in the graphic
    scale = 1 + (d / h)**2
    scale_sqrt = np.sqrt(scale)
    grad = d / scale_sqrt
    hess = 1 / scale / scale_sqrt
    return grad, hess


models = {
    "dt": models.DecisionTree(),
    "rf": models.RandomForest(),
    "lr": models.LR(),
    "xgb": models.XGBoost(),
    "svm": models.SVM(),
    "lgb": models.LGB(),
    # "mlp": models.MLP(),
    "lstm": models.LSTM()
}

# to get the final accuracy, calculate the mean and the mean absolute error should be the percentage of the
# performance since he wants to see performance
Esempio n. 30
0
if __name__ == "__main__":
    # device = torch.device("cpu")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    dataset = loaders.WavLSTM(wave, SAMPLE_RATE, WINDOW_SIZE)
    x, y = dataset[0]
    print(f'x: {x.shape} y{y.shape}')

    print(f'len(dataset): {len(dataset)}')

    loader = DataLoader(dataset, batch_size=BATCH_SIZE)
    loss_fn = nn.MSELoss()
    writer = SummaryWriter(f'runs/{LOG_FN}{time.asctime()}')

    # model = nn.LSTM(BATCH_SIZE*2, BATCH_SIZE*2, N_LAYERS).to(device)
    model = models.LSTM(WINDOW_SIZE, WINDOW_SIZE, N_LAYERS, device).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=LR)

    optimizer.zero_grad()

    hn = torch.randn(N_LAYERS, 1, WINDOW_SIZE).to(device)
    cn = torch.randn(N_LAYERS, 1, WINDOW_SIZE).to(device)
    all_outs = []
    for epoch in range(EPOCHS):

        model.reset()
        for i, (x, y) in enumerate(loader):
            try:
                x = x.to(device).view(BATCH_SIZE, 1, -1)
            except RuntimeError: