Beispiel #1
0
    def train(self):

        #Model
        self.lstm_model = LSTM(self.input_size, self.output_size,
                               self.nb_neurons)
        self.lstm_model.load_state_dict(
            torch.load("/Users/baptiste/Desktop/training/AAPL_36.pth"))
        loss_function = nn.MSELoss()
        optimizer = torch.optim.Adam(self.lstm_model.parameters(),
                                     lr=self.learning_rate)
        print("Start training")
        for epoch in range(nb_epochs):

            for (x, y) in self.training_dataloader:

                optimizer.zero_grad()
                self.lstm_model.hidden_cell = (torch.zeros(
                    1, self.batch_size, self.lstm_model.nb_neurons),
                                               torch.zeros(
                                                   1, self.batch_size,
                                                   self.lstm_model.nb_neurons))
                pred = self.lstm_model(x.float())
                y = y.view(self.batch_size, 1)
                loss = loss_function(pred, y)
                loss.backward()
                optimizer.step()

            print("epoch n°%s : loss = %s" % (epoch, loss.item()))
            self.validate()
            if epoch % 5 == 1:
                model_name = "%s_%s.pth" % (self.stock, epoch)
                torch.save(self.lstm_model.state_dict(),
                           os.path.join(output_path, model_name))
Beispiel #2
0
def main():
    model = LSTM(settings.vocab_size, settings.word_embedding_size,
                 settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out)
    '''     pre-train word embedding init    '''
    dataset = Dataset(args.data)
    model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding()))
    if torch.cuda.is_available():
        torch.cuda.manual_seed(settings.seed)
        model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5)
    criteria = nn.CrossEntropyLoss()
    best_dev_acc = 0.0
    best_test_acc = 0.0

    for i in xrange(dataset.size/settings.batch_size*settings.max_epochs):
        batch_data = dataset.get_batch()
        loss = train(model, batch_data, optimizer, criteria)
        if (i+1) % settings.validate_freq == 0:
            print "validating..."
            dev_acc = test(model, dataset.dev_data)
            test_acc = test(model, dataset.test_data)
            if dev_acc > best_dev_acc:
                best_dev_acc = dev_acc
                best_test_acc = test_acc
                torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc)))
            with open(os.path.join(args.model_dir, "log.txt"), "a") as logger:
                logger.write("epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc))
            print "epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
Beispiel #3
0
    def __call__(self, number_of_iterations = 2, learning_rate = 0.005, embedding_size = 300, hidden_size=100, batch_size=100):
        print("Starting 'Image Retrieval' in 'LSTM' mode with '" + self.difficulty + "' data")

        self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".pty"
        self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".csv"

        self.number_of_iterations = number_of_iterations
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.model = LSTM(self.nwords, self.embedding_size, self.image_feature_size, self.output_vector_size, self.hidden_size, self.batch_size)
        self.criterion = nn.CrossEntropyLoss()

        self.evaluate = Evaluate(self.model, self.img_features, self.minibatch, self.preprocess, self.image_feature_size, self.output_vector_size)
        print(self.model)

        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)

        self.train_loss_values = []

        self.magic()

        self.save_model()

        self.save_data()
Beispiel #4
0
    def apply_morph_only_rnn_gru(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
            1. morph lookup -> dropout
            2. MorphStructRNN
            3. lstm -> dropout
            4. lstm -> maxout -> dropout
            5. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #morph lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb')
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru')
        hiddens = morph_layer_1st.apply(src_morph_emb, src_morph_mask)
        self.layers.append(morph_layer_1st)

        rnn_layer_2rd = LSTM(self.n_hids , self.n_hids)
        hiddens , cells  = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_3nd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_3nd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_3nd)

        if True:
            maxout = MaxoutLayer()
            src_morph_merge_emb = src_morph_emb.sum(2)
            src_morph_mask = src_morph_mask.max(axis=2)
            #src_morph_merge_emb : sentence * batch * n_emb_morph
            states = T.concatenate([src_morph_merge_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_morph + self.n_hids, self.n_hids, src_morph_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
def evaluateAccuracy(data: list, network: LSTM, seq_length: int, sign_to_int):
    #network.eval()

    hidden = network.initHidden()
    memory = network.initMemory()
    hidden = hidden.to(device)
    memory = memory.to(device)
    print(len(data))
    right = 0
    total = 0

    with torch.no_grad():
        for i in range(0, len(data), seq_length):
            in_seq = convert_to_one_hot_matrix(data[i:i + seq_length],
                                               sign_to_int)
            out_seq = target_tensor(data[i + 1:i + seq_length + 1],
                                    sign_to_int)
            in_seq = in_seq.to(device)
            out_seq = out_seq.to(device)
            out_seq.unsqueeze_(-1)
            if i % 100000 == 0:
                print(i)
            for j in range(out_seq.size()[0]):
                output, hidden, memory = network(in_seq[j], hidden, memory)
                _, guess = output.max(1)
                if guess == out_seq[j]:
                    right = right + 1
                total = total + 1
        print("finished eval loop")
        print(total)
        print(right)
        res = right / total
        print("finished calculating accuracy")
        print(res)
    return
    def __init__(self, model_path="model.pt"):
        self.model = LSTM(45, 256, 45)
        self.model.load_state_dict(torch.load(model_path, map_location=device))
        self.sequence_length = 64

        with open("data/dictionary.json", "r") as f:
            self._mappings = json.load(f)

        self.melody = None
Beispiel #7
0
    def apply_normal(self, sentence, sentence_mask, use_noise=1, use_maxout=True):
        """
            sentence : sentence * batch
            1. word lookup -> dropout
            2. lstm -> dropout
            3. lstm -> maxout -> dropout
            4. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]

        emb_lstm_range = T.arange(self.n_emb_lstm)
        #word lookup table
        table = DynamicMixLookupTable(self.n_emb_lstm, **self.cfig)
        #table = DynamicLookupTable(self.n_emb_lstm, **self.cfig)
        #table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.src_emb = src_emb
        self.layers.append(table)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1st = LSTM(self.n_emb_lstm, self.n_hids)
        hiddens , cells  = rnn_layer_1st.apply(src_emb, src_mask)
        self.layers.append(rnn_layer_1st)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if use_maxout:
            maxout = MaxoutLayer()
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)
        self.cost = logistic_layer.cost(tgt, tgt_mask)

        #hier_softmax_layer = HierarchicalSoftmax(hiddens, self.n_hids, self.vocab_size)
        #self.layers.append(hier_softmax_layer)
        #self.cost = hier_softmax_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Beispiel #8
0
def create_model():
    model = LSTM(input_size=input_size,
                 num_classes=num_classes,
                 hidden=args.hidden_unit,
                 num_layers=args.num_layers,
                 mean_after_fc=args.mean_after_fc,
                 mask_empty_frame=args.mask_empty_frame)
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    return (model, optimizer)
def main():
    print 'Loading Data'
    x = cPickle.load(open('english_matrices.pkl', 'rb'))
    y = cPickle.load(open('chinese_matrices.pkl', 'rb'))
    print 'Done'

    # x = np.random.random((10, 10, 50, 1))
    # y = np.random.random((10, 10, 50, 1))
    encoder_lstm = LSTM(50, 100, 50)
    encoder_lstm.load_weights('encoder.pkl')
    outputs = []
    for i in range(10000):
        outputs.append(encoder_lstm.predict(x[i]))
    # for _ in range(10):
    #     for i in range(20):
    #         idx_start = i*500
    #         idx_end = min((i+1)*500, len(x))
    #         sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end))
    #         train(encoder_lstm, x[idx_start:idx_end], y[idx_start:idx_end][0], 50, 'encoder')
    #         encoder_lstm.save_weights('encoder.pkl')
    # outputs = encoder_lstm.predict(x[:10000])
    # encoder_lstm.save_weights('encoder.pkl')
    embed()
    decoder_lstm = LSTM(50, 100, 50)
    for _ in range(4):
        for i in range(20):
            idx_start = i * 500
            idx_end = min((i + 1) * 500, len(x))
            sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end))
            train(decoder_lstm, outputs[idx_start:idx_end], y[idx_start:idx_end], 50, 'decoder')
            decoder_lstm.save_weights('decoder.pkl')
Beispiel #10
0
def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node
    opt.n_existing_node = all_node_num

    net = LSTM(opt, hidden_state=opt.state_dim*5)
    net.double()
    print(net)

    criterion = nn.CosineSimilarity(dim=1, eps=1e-6)

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)
def setup_model(model_name, dataset_name, model_path, device):
    """Sets up language-model (LSTM) on device based on its designated filename."""
    device = torch.device(device)
    batch_size = 20
    data_loader = DataLoader(dataset_name, batch_size, device, 70)
    model = LSTM(vocab_size=len(data_loader.corpus.dictionary), device=device, \
                    batch_size=batch_size)

    model_path = os.path.join(model_path, model_name)
    print("loading model state_dict...")
    print("model_path", model_path)
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device(device))['model'])
    return model, data_loader, batch_size
Beispiel #12
0
    def apply_model(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        src_emb = lookup_layer('word',src)
        #src_morph_emb : sentence * batch * morph * n_emb_morph
        #src_morph_emb = lookup_layer('morph',src)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)

        rnn_layer_1rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_1rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_1rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if True:
            maxout = MaxoutLayer()
            #src_emb : sentence * batch * n_emb
            #hiddens : sentence * batch * hids
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
def synthesize_midi():
    int_to_sign = loade_data('./int2sign.json')
    sign_to_int = loade_data('./sign2int.json')
    network = LSTM(hidden_size=64, input_size=90, output_size=90)
    load_network(network, "net.pth")
    notes = synthesize_notes(network, "C3", 100, sign_to_int, int_to_sign)
    create_midi(notes)
Beispiel #14
0
def main():
    np.random.seed(RANDOM_SEED)  # 方便结果重现
    data = pd.read_csv(DATA_PATH)
    scaler, data = preprocessing_data.process_normalization(
        data=data,
        useful_colmns=[
            'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap', 'Weekday',
            'Increase'
        ])
    training_set, test_set = generate_batch.split_data(data)
    training_inputs, train_outputs = generate_batch.generate_batchs(
        training_set)
    test_inputs, test_outputs = generate_batch.generate_batchs(test_set)
    model = LSTM.build_model(inputs=training_inputs,
                             output_size=1,
                             LSTM_units=20)
    history = model.fit(training_inputs,
                        train_outputs,
                        batch_size=1,
                        epochs=30,
                        verbose=2,
                        shuffle=True)
    fig, ax1 = plt.subplots(1, 1)
    ax1.plot(history.epoch, history.history['loss'])
    ax1.set_title('Training loss')
    ax1.set_ylabel('Mean Absolute Error (MAE)', fontsize=12)
    ax1.set_xlabel('# Epochs', fontsize=12)
    plt.show()

    h = model.predict(test_inputs)
    dat = np.concatenate((test_outputs, h), axis=1)
    dat = dat * scaler.data_range_[-1] + scaler.data_min_[-1]
    df = pd.DataFrame(dat, columns=['actual', 'prediction'])
    df.to_csv('prediction.csv', index=False)
Beispiel #15
0
def main(cfg):
    if cfg['model'] == 'mlp':
        net = MLP(300, 768, cfg['class_num'])
    elif cfg['model'] == 'cnn':
        net = CNN(300, 768, cfg['class_num'])
    elif cfg['model'] == 'lstm':
        net = LSTM(300, cfg['class_num'], cfg['device'])
    elif cfg['model'] == 'gru':
        net = GRU(300, cfg['class_num'], cfg['device'])
    else:
        raise Exception(f'model {args.model} not available')

    if cfg['device'] == 'cuda':
        if len(cfg['gpu_ids']) == 1:
            torch.cuda.set_device(cfg['gpu_ids'][0])
            net = net.cuda()
        else:
            net = net.cuda()
            net = nn.DataParallel(net, device_ids=cfg['gpu_ids'])

    torch.backends.cudnn.benchmark = True

    if cfg['mode'] == 'train':
        train(cfg, net)
    elif cfg['mode'] == 'predict':
        predict(cfg, net, 'checkpoints/{}.pth'.format(cfg['model']))
Beispiel #16
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        emb_lstm_range = T.arange(n_emb_lstm)
        table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
        state_below = table.apply(src, emb_lstm_range)
        self.layers.append(table)
        if self.dropout < 1.0:
            state_below = dropout_layer(state_below, use_noise, self.dropout)

        rnn = LSTM(n_emb_lstm, self.n_hids)
        hiddens , cells  = rnn.apply(state_below, src_mask)
        self.layers.append(rnn)
        #if self.dropout < 1.0:
        #    hiddens = dropout_layer(hiddens, use_noise, self.dropout)
        rnn2 = FLSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn2.apply(hiddens , hiddens , src_mask)
        self.layers.append(rnn2)

        #rnn = NormalRNN(n_emb_lstm , self.n_hids)
        #hiddens  = rnn.apply(state_below, src_mask)
        #self.layers.append(rnn)

        if True:
            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)
        if self.dropout < 1.0:
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Beispiel #17
0
    def show_result(self):

        files = os.listdir(self.output)
        for file in files:
            if ".pth" in file:
                path = os.path.join(self.output, file)
                lstm_model = LSTM(self.input_size, self.output_size,
                                  self.nb_neurons)
                lstm_model.load_state_dict(torch.load(path))
                lstm_model.eval()
                print("model : %s loaded" % path)
                predictions = []

                for (x, _) in self.testing_dataloader:
                    if x.shape[0] == self.batch_size:
                        with torch.no_grad():
                            lstm_model.hidden_cell = (
                                torch.zeros(1, self.batch_size,
                                            lstm_model.nb_neurons),
                                torch.zeros(1, self.batch_size,
                                            lstm_model.nb_neurons))
                            output = lstm_model(x.float())
                            output = self.data.unnormalizeData(
                                output).squeeze()
                            predictions += output.tolist()

                plt.plot(predictions, label="prediction")
                plt.plot(self.real_data_test, label="target")
                plt.title(file)
                plt.legend()
                plt.show()
Beispiel #18
0
def train_model(stock, col):
    data = pd.read_csv(
        f'../data/ashare/{stock}.csv',
        encoding='gbk',
        converters={0: lambda x: datetime.strptime(x, '%Y-%m-%d')})
    data = data.sort_index(ascending=False)
    training_set = data.iloc[:, col].values

    sc = MinMaxScaler()
    training_data = sc.fit_transform(training_set.reshape(-1, 1))
    # print(training_data)

    num_classes = 2
    seq_length = 8

    x, y = sliding_windows(training_data, seq_length, num_classes)
    print(x.shape)
    print(y.shape)

    train_size = int(len(y) * 0.67)
    test_size = len(y) - train_size

    trainX = Variable(torch.Tensor(np.array(x[0:train_size])))
    trainY = Variable(torch.Tensor(np.array(y[0:train_size])))
    # print(trainX)
    # print(trainY)

    testX = Variable(torch.Tensor(np.array(x[train_size:len(x)])))
    testY = Variable(torch.Tensor(np.array(y[train_size:len(y)])))

    num_epochs = 1500
    learning_rate = 0.01

    input_size = 1
    hidden_size = 2
    num_layers = 1

    lstm = LSTM(num_classes, input_size, hidden_size, num_layers, seq_length)

    train(lstm, num_epochs, num_classes, trainX, trainY, learning_rate)

    torch.save(lstm.state_dict(),
               f'../data/ashare/models/{stock}-col{col}-8-2.pt')
def synthesize_notes(network: LSTM, inputs, n: int, sign_to_int: dict,
                     int_to_sign: dict):
    seq = []
    hidden = network.initHidden()
    memory = network.initMemory()
    inputs = convert_to_one_hot_matrix([inputs] * 2, sign_to_int)

    with torch.no_grad():
        for i in range(n):
            p, hidden, memory = network(inputs, hidden, memory)

            p = p.numpy()[0][0][:]

            ind = np.random.choice((p.shape[0]), 1, p=p / sum(p))[0]
            inputs = torch.zeros(1, 1, len(sign_to_int))
            inputs[0][0][ind] = 1

            seq.append(int_to_sign[str(ind)])

    return seq
Beispiel #20
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--gpu-device", "-g", type=int, default=0)
	parser.add_argument("--dropout-embedding-softmax", "-dos", type=float, default=0.5)
	parser.add_argument("--dropout-rnn", "-dor", type=float, default=0.2)
	parser.add_argument("--ndim-hidden", "-dh", type=int, default=640)
	parser.add_argument("--num-layers", "-nl", type=int, default=2)
	parser.add_argument("--num-to-generate", "-n", type=int, default=100)
	parser.add_argument("--model-filename", "-m", type=str, default="model.hdf5")
	parser.add_argument("--vocab-filename", "-v", type=str, default="vocab.pkl")
	args = parser.parse_args()

	assert args.num_layers > 0
	assert args.ndim_hidden > 0
	assert os.path.isfile(args.vocab_filename) is True

	with open(args.vocab_filename, "rb") as f:
		vocab_str_id = pickle.load(f)
		vocab_id_str = pickle.load(f)

	vocab_size = len(vocab_str_id)
	lstm = LSTM(vocab_size=vocab_size,
				ndim_hidden=args.ndim_hidden, 
				num_layers=args.num_layers,
				dropout_embedding_softmax=args.dropout_embedding_softmax, 
				dropout_rnn=args.dropout_rnn)
	assert lstm.load(args.model_filename)

	for n in range(args.num_to_generate):
		lstm.reset_state()
		x_sequence = np.asarray([ID_EOS]).astype(np.int32)[None, :]
		for t in range(1000):
			distribution = functions.softmax(lstm(x_sequence[:, t])).data[0]
			y_data = np.random.choice(np.arange(distribution.size), size=1, p=distribution).astype(np.int32)
			x_sequence = np.concatenate((x_sequence, y_data[None, :]), axis=1)
			if y_data[0] == ID_EOS:
				break
		tokens = []
		for t in range(1, x_sequence.size - 2):
			tokens.append(vocab_id_str[x_sequence[0, t]])
		print(" ".join(tokens))
Beispiel #21
0
def validate():

    stock = "MC.PA"
    directory = "/Users/baptiste/Desktop/training"

    input_size = 4
    output_size = 4
    nb_neurons = 200

    test_split = 0.1
    time_window = 5

    dataloader = Data(stock)
    df = dataloader.getData()
    real_data = df.to_numpy()
    df_normalized = dataloader.normalizeData(df)
    df_normalized = torch.FloatTensor(df_normalized.to_numpy())

    test_split = int(test_split * df.shape[0])
    real_test_split = real_data[-test_split:-time_window:, 3]
    testing_split = df_normalized[-test_split:, :]

    files = os.listdir(directory)

    for file in files:
        if ".pth" in file:
            path = os.path.join(directory, file)
            lstm_model = LSTM(input_size, output_size, nb_neurons)
            lstm_model.load_state_dict(torch.load(path))
            print("model : %s loaded" % path)

            lstm_model.eval()

            predictions = []

            for i in range(testing_split.shape[0] - time_window):

                x_test = testing_split[i:i + time_window]

                with torch.no_grad():

                    lstm_model.hidden_cell = (torch.zeros(
                        1, 1, lstm_model.nb_neurons),
                                              torch.zeros(
                                                  1, 1, lstm_model.nb_neurons))
                    predictions.append(
                        dataloader.unnormalizeData(
                            lstm_model(x_test).tolist()))
            predictions = np.array(predictions)[:, 3, 0]

            #plt.figure(15,10)
            plt.plot(real_test_split, label="target")
            plt.plot(predictions, label="prediction")
            plt.title(file)
            plt.legend()
            plt.show()
Beispiel #22
0
def train():
    train_writer = SummaryWriter(
        os.path.join(LOG_DIR, 'train7-64-LSTM-Doppler'))
    test_writer = SummaryWriter(os.path.join(LOG_DIR, 'test7-64-LSTM-Doppler'))

    train_loader, test_loader = load_data(TRAIN_DIR, TEST_DIR)

    lstm = LSTM().to(DEVICE)
    optimizer = torch.optim.Adam(lstm.parameters(), lr=LR)
    loss_func = nn.CrossEntropyLoss().to(DEVICE)

    for epoch in range(MAX_EPOCH):
        log_string('**** EPOCH %3d ****' % (epoch))
        sys.stdout.flush()

        train_one_epoch(epoch, train_writer, train_loader, lstm, loss_func,
                        optimizer)
        eval_one_epoch(epoch, test_writer, test_loader, lstm, loss_func)

    # save model parameters to files
    torch.save(lstm.state_dict(), MODEL_DIR)
Beispiel #23
0
def main():
    names_str = read_csv(filname='data/names/names.csv')
    all_char_str = set([char for name in names_str for char in name])
    char2idx = {char: i for i, char in enumerate(all_char_str)}
    char2idx['EOS'] = len(char2idx)
    # save char dictionary
    cPickle.dump(char2idx, open("dic.p", "wb"))

    names_idx = [[char2idx[char_str] for char_str in name_str]
                 for name_str in names_str]

    # build model
    model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128)

    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters())

    n_iters = 5

    for iter in range(1, n_iters + 1):

        # data shuffle
        random.shuffle(names_idx)

        total_loss = 0

        for i, name_idx in enumerate(names_idx):
            input = inputTensor(name_idx)
            target = targetTensor(name_idx, char2idx)

            loss = train(model, criterion, input, target)
            total_loss += loss

            optimizer.step()

        print(iter, "/", n_iters)
        print("loss {:.4}".format(float(total_loss / len(names_idx))))

        # save trained model
        torch.save(model.state_dict(), "model.pt")
Beispiel #24
0
def model_fn(model_dir):
    """Load the PyTorch model from the `model_dir` directory."""
    print("Loading model.")

    # First, load the parameters used to create the model.
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)

    print("model_info: {}".format(model_info))

    # Determine the device and construct the model.
    device = torch.device("cpu" if torch.cuda.is_available() else "cpu")
    #model = LSTM(model_info['embedding_dim'], model_info['hidden_dim'], model_info['vocab_size'])
    model = LSTM(model_info['num_classes'], model_info['input_size'], model_info['hidden_size'], model_info['num_layers'])

    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))

    model.to(device).eval()

    print("Done loading model.")
    return model
def main(opt):
    model = LSTM(opt, batch_first=True, dropout=opt.dropout)
    if opt.pre_train:
        model.load_state_dict(torch.load(opt.save_path))
    optimizer = optim.Adam(model.parameters(), opt.learning_rate)
    mseloss = nn.MSELoss()

    dataset = PowerDataset(opt,
                           prepocess_path=opt.prepocess_path,
                           transform=transforms.Compose(
                               [transforms.ToTensor()]))
    train_dataset = data.Subset(dataset, indices=range(8664))
    test_dataset = data.Subset(dataset, indices=range(8664, len(dataset)))
    train_dataloader = data.dataloader.DataLoader(train_dataset,
                                                  num_workers=opt.n_threads,
                                                  batch_size=opt.batch_size,
                                                  shuffle=True)
    test_sampler = data.SequentialSampler(test_dataset)
    test_dataloader = data.dataloader.DataLoader(
        test_dataset,
        num_workers=opt.n_threads,
        batch_size=opt.test_batch_size,
        shuffle=False,
        sampler=test_sampler)

    for e in range(opt.epochs):
        if opt.test_only:
            test(model, test_dataloader)
            break
        print('epoch: ', e)
        train(model, mseloss, optimizer, train_dataloader)
        test(model, test_dataloader)
        torch.save(model.state_dict(), opt.save_path)
Beispiel #26
0
def infer(minmax, data_train, data_test):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # lstm_train_model = LSTM()
    model = LSTM().to(device)
    model.load_state_dict(
        torch.load("D:\stock\weights\checkpont_67.27376310428824.pth"))
    model.eval()
    test_size = len(data_test)
    future_day = test_size
    timestamp = 5
    output_predict = np.zeros(
        (data_train.shape[0] + future_day, data_train.shape[1]))
    output_predict[0] = data_train.iloc[0]
    for k in range(0, (data_train.shape[0] // timestamp) * timestamp,
                   timestamp):
        index = min(k + timestamp, output_predict.shape[0] - 1)
        batch_x = np.expand_dims(df.iloc[k:index, :].values, axis=0)
        batch_y = df.iloc[k + 1:index + 1, :].values
        batch_x = torch.Tensor(batch_x).to(device)
        batch_y = torch.Tensor(batch_y).to(device)
        out_logits = model(batch_x)
        # init_value = last_state
        output_predict[k + 1:k + timestamp +
                       1] = out_logits.cpu().detach().numpy()[0]
    output_predict = minmax.inverse_transform(output_predict)
    return output_predict
Beispiel #27
0
def train(config):
    train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb"))
    dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb"))
    test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb"))
    vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb"))
    # load w2v data
    weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb"))

    if config.task_name == "lstm":
        text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         output_dim=config.class_num, hidden_dim=config.hidden_dim,
                         num_layers=config.num_layers, dropout=config.dropout)
    elif config.task_name == "lstm_maxpool":
        text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         		  output_dim=config.class_num, hidden_dim=config.hidden_dim,
                         		  num_layers=config.num_layers, dropout=config.dropout)
    elif config.task_name == "rnn":
        text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         output_dim=config.class_num, hidden_dim=config.hidden_dim,
                         num_layers=config.num_layers, dropout=config.dropout)
    elif config.task_name == "cnn":
        text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         class_num=config.class_num, kernel_num=config.kernel_num,
                         kernel_sizes=config.kernel_sizes, dropout=config.dropout,
                         static=config.static, in_channels=config.in_channels)
    elif config.task_name == "cnn_w2v":
        text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                             class_num=config.class_num, kernel_num=config.kernel_num,
                             kernel_sizes=config.kernel_sizes, dropout=config.dropout,
                             static=config.static, in_channels=config.in_channels,
                             weight=weight)
    elif config.task_name == "rcnn":
        text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, 
                          output_dim=config.class_num, hidden_dim=config.hidden_dim, 
                          num_layers=config.num_layers, dropout=config.dropout)
    optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    accuracy = AccuracyMetric(pred='output', target='target')

    trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(),
                      batch_size=config.batch_size, check_code_level=0,
                      metrics=accuracy, n_epochs=config.epoch,
                      dev_data=dev_data, save_path=config.save_path,
                      print_every=config.print_every, validate_every=config.validate_every,
                      optimizer=optimizer, use_tqdm=False,
                      device=config.device, callbacks=[timing, early_stop])
    trainer.train()

    # test result
    tester = Tester(test_data, text_model, metrics=accuracy)
    tester.test()
Beispiel #28
0
def get_model(input_size, embed_size, output_size, model_type, dropout=DROPOUT):
    
    if model_type.lower() == 'lstm':
        return LSTM(input_size, embed_size, output_size, dropout)
        
    if model_type.lower() == 'cnn':
        return CNN(input_size, embed_size, output_size, dropout)

    if model_type.lower() == 'gru':
        return GRU(input_size, embed_size, output_size, dropout)
    
    else:
        return None
def train(network: LSTM, criterion, input_seq, follow_seq,
          optimizer: optim.Optimizer, scheduler):
    follow_seq.unsqueeze_(-1)
    hidden = network.initHidden()
    memory = network.initMemory()
    hidden = hidden.to(device)
    memory = memory.to(device)
    loss = 0

    network.zero_grad()
    for i in range(input_seq.size()[0]):
        output, hidden, memory = network(input_seq[i], hidden, memory)
        l = criterion(output, follow_seq[i])
        #TODO: smooth loss here
        loss += l

    loss.backward()

    optimizer.step()
    scheduler.step()

    return output, loss.item() / input_seq.size()[0]
Beispiel #30
0
def main():
    global args, best_prec1
    best_prec1 = 1e6
    args = parser.parse_args()
    args.original_lr = 1e-6
    args.lr = 1e-6
    args.momentum = 0.95
    args.decay = 5 * 1e-4
    args.start_epoch = 0
    args.epochs = 5000
    args.steps = [-1, 1, 100, 150]
    args.scales = [1, 1, 1, 1]
    args.workers = 4
    args.seed = time.time()
    args.print_freq = 30
    args.feature_size = 100
    args.lSeq=5
    wandb.config.update(args)
    wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}"

    conf = configparser.ConfigParser()
    conf.read(args.config)
    # print(conf)
    TRAIN_DIR = conf.get("lstm", "train")
    VALID_DIR = conf.get("lstm", "valid")
    TEST_DIR = conf.get("lstm", "test")
    LOG_DIR = conf.get("lstm", "log")
    create_dir_not_exist(LOG_DIR)
    # TODO: train_list to train_file
    train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)]
    val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)]
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    torch.cuda.manual_seed(int(args.seed))
    model = LSTM(args.feature_size, args.feature_size, args.feature_size)
    model = model.cuda()
    criterion = nn.MSELoss().cuda()
    optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay)
    model = DataParallel_withLoss(model, criterion)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train(train_list, model, criterion, optimizer, epoch)
        prec1 = validate(val_list, model, criterion, epoch)
        with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f:
            f.write("epoch " + str(epoch) + "  MSELoss: " + str(float(prec1)))
            f.write("\n")
        wandb.save(os.path.join(LOG_DIR, args.task + ".txt"))
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1))
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.pre,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
Beispiel #31
0
    def build_model(self, output_graph=True):
        self.lstm_0 = LSTM(seq_length=self.FLAGS.seq_length,
                           hidden_size=self.FLAGS.hidden_size,
                           lr=self.FLAGS.lstm_lr,
                           activation=tf.nn.tanh,
                           forget_bias=0.0,
                           name="forget_bias_0.0")
        self.lstm_1 = LSTM(seq_length=self.FLAGS.seq_length,
                           hidden_size=self.FLAGS.hidden_size,
                           lr=self.FLAGS.lstm_lr,
                           activation=tf.nn.tanh,
                           forget_bias=1.0,
                           name="forget_bias_1.0")
        self.lstm_5 = LSTM(seq_length=self.FLAGS.seq_length,
                           hidden_size=self.FLAGS.hidden_size,
                           lr=self.FLAGS.lstm_lr,
                           activation=tf.nn.tanh,
                           forget_bias=5.0,
                           name="forget_bias_5.0")
        self.lstm_10 = LSTM(seq_length=self.FLAGS.seq_length,
                            hidden_size=self.FLAGS.hidden_size,
                            lr=self.FLAGS.lstm_lr,
                            activation=tf.nn.tanh,
                            forget_bias=10.0,
                            name="forget_bias_10.0")

        self.models[self.lstm_0.name] = self.lstm_0
        self.models[self.lstm_1.name] = self.lstm_1
        self.models[self.lstm_5.name] = self.lstm_5
        self.models[self.lstm_10.name] = self.lstm_10

        self.sess.run(tf.global_variables_initializer())

        # self.sess.run([self.irnn.w_assign, self.irnn.b_assign])

        if output_graph:
            tf.summary.FileWriter("logs/", self.sess.graph)
Beispiel #32
0
def train_predict(state):
    if state == 'train':
        model = LSTM(len(vocab),
                     batch_size,
                     num_steps=num_steps,
                     lr=learning_rate)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver(max_to_keep=100)

        with tf.Session() as sess:
            sess.run(init)

            for ep in range(1):
                new_state = sess.run(model.init_state)  #每次跑完一个epoch,都初始化一下状态
                # print(np.shape(new_state))
                counter = 0
                for x, y in get_batches(encode, batch_size, num_steps):
                    counter += 1
                    start = time.time()
                    feed = {
                        model.inputs: x,
                        model.label: y,
                        model.keep_prob: keep_prob,
                        model.init_state: new_state
                    }
                    batch_loss, new_state, _ = sess.run(
                        [model.loss, model.final_state, model.optimizer],
                        feed_dict=feed)
                    end = time.time()

                    print('Epoch : {} / {}...'.format(ep + 1, epoches),
                          '  Training steps :{}'.format(counter),
                          '  Training loss : {:.4f}'.format(batch_loss),
                          '  {:.4f} sec/batch'.format(end - start))
                    if counter % save_freq == 0:
                        saver.save(sess,
                                   'checkpoints/iter{}.ckpt'.format(counter))
                        print('\n--------save ok!---------\n')

                saver.save(sess, 'checkpoints/end_iter.ckpt')
    elif state == 'predict':
        word = '传说在天地间有一块灵石'

        novel = generate_novel(word, 1000)
        print(''.join(novel))

    else:
        print('状态错误,请调试...')
def main():
    global device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    notes = loade_data('./notes.json')['notes']
    validation = loade_data('./validation.json')['notes']
    test = loade_data('./test.json')['notes']
    int_to_sign = loade_data('./int2sign.json')
    sign_to_int = loade_data('./sign2int.json')
    seq_length = 100

    #refactor this, we only need a one-hot for the input
    #select a sequence or whatever here, use predefined for now (testing)

    learning_rate = 0.001

    network = LSTM(hidden_size=64, input_size=90, output_size=90)
    criterion = nn.CrossEntropyLoss()
    network.to(device)
    optimizer = optim.Adam(network.parameters(), learning_rate)
    scheduler = optim.lr_scheduler.CyclicLR(optimizer,
                                            base_lr=0.0001,
                                            max_lr=0.001,
                                            cycle_momentum=False)
    # move network to GPU

    print(device)
    #network, _, losses, best_net = trainLoop(network, criterion, notes, optimizer, 3, seq_length, sign_to_int, scheduler)
    best_net = network
    """plt.plot(losses)
    plt.savefig('losses.png')
    plt.close('all')"""
    print('saving network....')
    #save_network(best_net, "net.pth")
    print('evaluating on test data...')
    evaluateAccuracy(test, best_net, seq_length, sign_to_int)
    print("eval done!")
Beispiel #34
0
def train():
    int_to_vocab, vocab_to_int, n_vocab, in_text = get_data_from_file( flags.batch_size, flags.seq_size)
    x_batch,y_batch = create_batch(in_text,flags.batch_size,flags.seq_size)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = LSTM(n_vocab, flags.seq_size,flags.embedding_size, flags.lstm_size).to(device)

    #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.7)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_function = nn.CrossEntropyLoss()

    for e in range(flags.num_epochs):
        print(f'epoch #{e}: ',end="")
        batches = get_batches(x_batch,y_batch,flags.batch_size, flags.seq_size)
        (state_h_1, state_c_1),(state_h_2, state_c_2) = model.zero_state(flags.batch_size)
        state_h_1 = state_h_1.to(device)
        state_c_1 = state_c_1.to(device)
        state_h_2 = state_h_2.to(device)
        state_c_2 = state_c_2.to(device)
        
        for i,(x, y) in enumerate(batches):
            model.train()
            optimizer.zero_grad()
 

            x = torch.tensor(x , dtype=torch.int64).to(device)
            #print("x shape {} ".format(np.shape(x)))
            
            tmp = []
            for index,el in enumerate(y) :
                tmp.append(np.zeros(n_vocab))
                tmp[index][y[index]] = 1
            #print(y)
            y = tmp 
            y = torch.tensor(y , dtype=torch.int64).to(device)
            logits, (state_h_1, state_c_1),(state_h_2, state_c_2) = model(x, (state_h_1, state_c_1),(state_h_2, state_c_2))
            #print("logits shape {} , y shape {}".format(np.shape(logits),np.shape(y)))
            loss = loss_function(logits, y)

            state_h_1 = state_h_1.detach()
            state_c_1 = state_c_1.detach()
            state_h_2 = state_h_2.detach()
            state_c_2 = state_c_2.detach()

            loss_value = loss.item()

            loss.backward()
            _ = torch.nn.utils.clip_grad_norm_(model.parameters(), flags.gradients_norm)
            optimizer.step()
        print(f'batch #{i}:\tloss={loss.item():.10f}')
    return model 
Beispiel #35
0
def inference(args, cnn_features):
    tf.reset_default_graph()
    with tf.Session() as sess:
        net = LSTM(sess, max_seq_len=25, h_dim=1024)
        net.build_model()
        net.inference(cnn_features=cnn_features,
                      label_file=args.label_file,
                      gen_from=args.gen_from,
                      out_path=args.output_folder,
                      bsize=args.bsize)
Beispiel #36
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--batchsize", "-b", type=int, default=64)
	parser.add_argument("--seq-length", "-l", type=int, default=35)
	parser.add_argument("--total-epochs", "-e", type=int, default=300)
	parser.add_argument("--gpu-device", "-g", type=int, default=0)
	parser.add_argument("--grad-clip", "-gc", type=float, default=5)
	parser.add_argument("--learning-rate", "-lr", type=float, default=1)
	parser.add_argument("--weight-decay", "-wd", type=float, default=0.000001)
	parser.add_argument("--dropout-embedding-softmax", "-dos", type=float, default=0.5)
	parser.add_argument("--dropout-rnn", "-dor", type=float, default=0.2)
	parser.add_argument("--momentum", "-mo", type=float, default=0.9)
	parser.add_argument("--optimizer", "-opt", type=str, default="msgd")
	parser.add_argument("--ndim-hidden", "-dh", type=int, default=640)
	parser.add_argument("--num-layers", "-nl", type=int, default=2)
	parser.add_argument("--lr-decay-epoch", "-lrd", type=int, default=20)
	parser.add_argument("--model-filename", "-m", type=str, default="model.hdf5")
	parser.add_argument("--vocab-filename", "-v", type=str, default="vocab.pkl")
	parser.add_argument("--train-filename", "-train", default=None)
	parser.add_argument("--dev-filename", "-dev", default=None)
	parser.add_argument("--test-filename", "-test", default=None)
	args = parser.parse_args()

	assert args.num_layers > 0
	assert args.ndim_hidden > 0

	dataset_train, dataset_dev, dataset_test, vocab_str_id, vocab_id_str = read_data(args.train_filename, args.dev_filename, args.test_filename)
	dataset_dev = np.asarray(dataset_dev, dtype=np.int32)
	dataset_test = np.asarray(dataset_test, dtype=np.int32)
	assert len(dataset_train) > 0

	if os.path.isfile(args.vocab_filename):
		with open(args.vocab_filename, "rb") as f:
			vocab_str_id = pickle.load(f)
			vocab_id_str = pickle.load(f)
	else:
		with open(args.vocab_filename, "wb") as f:
			pickle.dump(vocab_str_id, f)
			pickle.dump(vocab_id_str, f)

	print("#train = {}".format(len(dataset_train)))
	print("#dev = {}".format(len(dataset_dev)))
	print("#test = {}".format(len(dataset_test)))

	vocab_size = len(vocab_str_id)
	lstm = LSTM(vocab_size=vocab_size,
				ndim_hidden=args.ndim_hidden, 
				num_layers=args.num_layers,
				dropout_embedding_softmax=args.dropout_embedding_softmax, 
				dropout_rnn=args.dropout_rnn)
	lstm.load(args.model_filename)

	total_iterations_train = len(dataset_train) // (args.seq_length * args.batchsize)

	optimizer = Optimizer(args.optimizer, args.learning_rate, args.momentum)
	optimizer.setup(lstm.model)
	if args.grad_clip > 0:
		optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))
	if args.weight_decay > 0:
		optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

	using_gpu = False
	if args.gpu_device >= 0:
		cuda.get_device(args.gpu_device).use()
		lstm.model.to_gpu()
		using_gpu = True
	xp = lstm.model.xp

	training_start_time = time.time()
	for epoch in range(args.total_epochs):

		sum_loss = 0
		epoch_start_time = time.time()

		# training
		for itr in range(total_iterations_train):
			# sample minbatch
			batch_offsets = np.random.randint(0, len(dataset_train) - args.seq_length - 1, size=args.batchsize)
			x_batch = np.empty((args.batchsize, args.seq_length), dtype=np.int32)
			t_batch = np.empty((args.batchsize, args.seq_length), dtype=np.int32)
			for batch_index, offset in enumerate(batch_offsets):
				sequence = dataset_train[offset:offset + args.seq_length]
				teacher = dataset_train[offset + 1:offset + args.seq_length + 1]
				x_batch[batch_index] = sequence
				t_batch[batch_index] = teacher

			if using_gpu:
				x_batch = cuda.to_gpu(x_batch)
				t_batch = cuda.to_gpu(t_batch)

			# update model parameters
			with chainer.using_config("train", True):
				lstm.reset_state()
				loss = 0
				for t in range(args.seq_length):
					x_data = x_batch[:, t]
					t_data = t_batch[:, t]
					y_data = lstm(x_data)
					loss += functions.softmax_cross_entropy(y_data, t_data)

				lstm.model.cleargrads()
				loss.backward()
				optimizer.update()

				sum_loss += float(loss.data)
				assert sum_loss == sum_loss, "Encountered NaN!"

			printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train))

		lstm.save(args.model_filename)

		# evaluation
		perplexity = -1
		negative_log_likelihood = 0
		if epoch % 10 == 0:
			x_sequence = dataset_dev[:-1]
			t_sequence = dataset_dev[1:]
			seq_length_dev = len(x_sequence)

			if using_gpu:
				x_sequence = cuda.to_gpu(x_sequence)[None, :]
				t_sequence = cuda.to_gpu(t_sequence)[None, :]

			with chainer.no_backprop_mode() and chainer.using_config("train", False):
				lstm.reset_state()
				for t in range(seq_length_dev):
					x_data = x_sequence[:, t]
					t_data = t_sequence[:, t]
					y_data = lstm(x_data)
					negative_log_likelihood += float(functions.softmax_cross_entropy(y_data, t_data).data)

					printr("Computing perplexity ...{:3.0f}% ({}/{})".format((t + 1) / seq_length_dev * 100, t + 1, seq_length_dev))

			assert negative_log_likelihood == negative_log_likelihood, "Encountered NaN!"
			perplexity = math.exp(negative_log_likelihood / len(dataset_dev))

		clear_console()
		print("Epoch {} done in {} sec - loss: {:.6f} - log_likelihood: {} - ppl: {} - lr: {:.3g} - total {} min".format(
			epoch + 1, int(time.time() - epoch_start_time), sum_loss / total_iterations_train, 
			int(-negative_log_likelihood), int(perplexity), optimizer.get_learning_rate(),
			int((time.time() - training_start_time) // 60)))

		if epoch >= args.lr_decay_epoch:
			optimizer.decrease_learning_rate(0.98, final_value=1e-5)
Beispiel #37
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm
        n_emb_struct = self.n_emb_struct
        n_emb_share = self.n_emb_share

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        if False: #(share only part of embedding)
            n_emb_all = n_emb_lstm + n_emb_struct - n_emb_share
            emb_all_range = T.arange(n_emb_all)
            emb_lstm_range = T.arange(n_emb_lstm)
            emb_struct_range = T.arange(n_emb_lstm - n_emb_share, n_emb_all)

            table = lookup_table(n_emb_all, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_all_range)
            state_below_lstm = table.apply(src, emb_lstm_range)
            state_below_struct = table.apply(src, emb_struct_range)
            self.layers.append(table)

            rnn = SLSTM(n_emb_lstm, n_emb_struct, n_emb_share, self.n_hids, self.n_shids, self.n_structs)
            #rnn = LSTM(self.n_in, self.n_hids)
            hiddens = rnn.merge_out(state_below, state_below_lstm, state_below_struct, src_mask)
            self.layers.append(rnn)

        elif True: # use rnn_pyramid
            emb_lstm_range = T.arange(n_emb_lstm)
            table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_lstm_range)
            self.layers.append(table)

            if self.dropout < 1.0:
                state_below = dropout_layer(state_below, use_noise, self.dropout)

            rnn = rnn_pyramid_layer(n_emb_lstm, self.n_hids)
            hiddens, cells, structs = rnn.apply(state_below, src_mask)
            self.layers.append(rnn)
            self.structs = structs

        else: # share all embedding
            emb_lstm_range = T.arange(n_emb_lstm)
            table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
            state_below = table.apply(src, emb_lstm_range)
            self.layers.append(table)

            if self.dropout < 1.0:
                state_below = dropout_layer(state_below, use_noise, self.dropout)

            rnn = LSTM(n_emb_lstm, self.n_hids)
            hiddens, cells = rnn.apply(state_below, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn)

            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnn1 = LSTM(self.n_hids, self.n_hids)
            hiddens, cells = rnn1.apply(hiddens, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn1)

            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask)
            self.layers.append(maxout)

            #rnng = LSTM(n_emb_lstm, self.n_hids)
            #hiddens, cells = rnn.apply(state_below, src_mask)
            #hiddensg = rnng.merge_out(state_below, src_mask)
            #self.layers.append(rnng)

            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            #chunk = chunk_layer(n_lstm_in + n_lstm_out, n_lstm_out, n_chunk_out, 6)
            n_emb_hid = n_emb_lstm + self.n_hids
            emb_hid = T.concatenate([state_below, hiddens], axis=2)
            #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs)
            #hiddens = chunk.merge_out(hiddens, hiddens, src_mask, merge_how="for_struct",\
            #        state_below_other=state_below, n_other=n_emb_lstm)
            chunk = chunk_layer(n_emb_hid, self.n_hids, self.n_hids, self.n_structs)
            hiddens = chunk.merge_out(emb_hid, hiddens, src_mask, merge_how="for_struct",\
                    state_below_other=None, n_other=0)
            #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs)
            #hiddens = chunk.merge_out(hiddens, hiddensg, src_mask, merge_how="both",\
            #        state_below_other=state_below, n_other=n_emb_lstm)
            self.layers.append(chunk)

        # apply dropout
        if self.dropout < 1.0:
            # dropout is applied to the output of maxout in ghog
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Beispiel #38
0
    def apply(self, sentence, sentence_mask, use_noise=1):
        n_emb_lstm = self.n_emb_lstm
        n_emb_struct = self.n_emb_struct
        n_emb_share = self.n_emb_share

        src = sentence[:-1]
        src_mask = sentence_mask[:-1]
        tgt = sentence[1:]
        tgt_mask = sentence_mask[1:]

        emb_lstm_range = T.arange(n_emb_lstm)
        table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb')
        state_below = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        if self.dropout < 1.0:
            state_below = dropout_layer(state_below, use_noise, self.dropout)

        rnn = LSTM(n_emb_lstm, self.n_hids)
        hiddens, cells = rnn.apply(state_below, src_mask)
        #hiddens = rnn.merge_out(state_below, src_mask)
        self.layers.append(rnn)

        if True:
            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnn1 = LSTM(self.n_hids, self.n_hids)
            hiddens, cells = rnn1.apply(hiddens, src_mask)
            #hiddens = rnn.merge_out(state_below, src_mask)
            self.layers.append(rnn1)

        if True:
            if self.dropout < 1.0:
                hiddens = dropout_layer(hiddens, use_noise, self.dropout)

            rnnp = rnn_pyramid_layer(self.n_hids, n_emb_lstm, self.n_hids)
            hiddens,cells,structs,pyramid = rnnp.apply(hiddens, state_below, src_mask)
            self.layers.append(rnnp)
            #self.structs = structs
            self.rnn_len = rnnp.n_steps
        self.sent_len = sentence.shape[0]

        if True:
            maxout = maxout_layer()
            states = T.concatenate([state_below, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = dropout_layer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)

        for layer in self.layers:
            self.params.extend(layer.params)

        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)
Beispiel #39
0
    def apply_morph_attention(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1):
        """
            sentence : sentence * batch
            sentence_morph : sentence * batch * morph
            src_morph_emb : sentence * batch * morph * n_emb_morph
            1. word morph lookup -> dropout -> attention
            2. lstm -> dropout
            3. lstm -> maxout -> dropout
            4. logistic
        """
        src, src_mask = sentence[:-1], sentence_mask[:-1]
        tgt, tgt_mask = sentence[1:], sentence_mask[1:]
        src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1]

        #word lookup table
        emb_lstm_range = T.arange(self.n_emb_lstm)
        table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb')
        src_emb = table.apply(src, emb_lstm_range)
        self.layers.append(table)

        #morph lookup table
        emb_morph_range = T.arange(self.n_emb_morph)
        table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb')
        src_morph_emb = table_morph.apply(src_morph, emb_morph_range)
        self.layers.append(table_morph)

        if self.dropout < 1.0:
            src_emb = DropoutLayer(src_emb, use_noise, self.dropout)
            src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout)

        lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids)
        hiddens, cells = lstm_att_1st.apply(src_emb, src_morph_emb, src_mask)
        self.layers.append(lstm_att_1st)
        #print len(hiddens) , hiddens[0].ndim

        rnn_layer_2rd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask)
        self.layers.append(rnn_layer_2rd)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        rnn_layer_3nd = LSTM(self.n_hids, self.n_hids)
        hiddens , cells = rnn_layer_3nd.apply(hiddens, src_mask)
        self.layers.append(rnn_layer_3nd)

        if True:
            maxout = MaxoutLayer()
            #src_emb : sentence * batch * n_emb
            #hiddens : sentence * batch * hids
            states = T.concatenate([src_emb, hiddens], axis=2)
            maxout_n_fold = 2
            hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold)
            self.layers.append(maxout)

        if self.dropout < 1.0:
            hiddens = DropoutLayer(hiddens, use_noise, self.dropout)

        logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size)
        self.layers.append(logistic_layer)

        self.cost = logistic_layer.cost(tgt, tgt_mask)
        for layer in self.layers:
            self.params.extend(layer.params)
        self.L2 = sum(T.sum(item ** 2) for item in self.params)
        self.L1 = sum(T.sum(abs(item)) for item in self.params)