예제 #1
0
    def __init__(self, model):
        self.model_ = model
        self.lstm_stack_enc_ = lstm_spatial.LSTMStack()
        self.lstm_stack_dec_ = lstm.LSTMStack()
        self.lstm_stack_pre_ = lstm.LSTMStack()
        for l in model.lstm:
            self.lstm_stack_enc_.Add(lstm_spatial.LSTM(l))
        if model.dec_seq_length > 0:
            for l in model.lstm_dec:
                self.lstm_stack_dec_.Add(lstm.LSTM(l))
        if model.pre_seq_length > 0:
            for l in model.lstm_pre:
                self.lstm_stack_pre_.Add(lstm.LSTM(l))
        assert model.dec_seq_length > 0
        self.is_conditional_dec_ = model.dec_conditional
        if self.is_conditional_dec_ and model.dec_seq_length > 0:
            assert self.lstm_stack_dec_.HasInputs()
        self.squash_relu_ = False  #model.squash_relu
        self.squash_relu_lambda_ = 0  #model.squash_relu_lambda
        self.relu_data_ = False  #model.relu_data
        self.binary_data_ = True  #model.binary_data or self.squash_relu_
        self.only_occ_predict_ = model.only_occ_predict

        if len(model.timestamp) > 0:
            old_st = model.timestamp[-1]
            ckpt = os.path.join(model.checkpoint_dir,
                                '%s_%s.h5' % (model.name, old_st))
            f = h5py.File(ckpt)
            self.lstm_stack_enc_.Load(f)
            if model.dec_seq_length > 0:
                self.lstm_stack_dec_.Load(f)
            if model.pre_seq_length > 0 and not self.only_occ_predict_:
                self.lstm_stack_pre_.Load(f)
            f.close()
    def __init__(self, model, board, board_ladv, board_sup):
        self.model_ = model
        self.board_ = board
        self.board_ladv_ = board_ladv
        self.board_sup_ = board_sup
        self.lstm_stack_enc_ = lstm_spatial.LSTMStack()
        self.lstm_stack_dec_ = lstm.LSTMStack()
        self.lstm_stack_pre_ = lstm.LSTMStack()
        model_file_sup = './data/bk20151009/part_1/bvlc_googlenet_quick_iter_231760.caffemodel'
        solver_file = './data/googlenet_ladv_solver.prototxt'
        prototxt_file_sup = './data/train_val_quick_grad.prototxt'
        mean_file = './data/bk20151009/part_1/lmdb_casia_full_part1_mean.binaryproto'
        self.cnn_solver = caffe.SGDSolver(solver_file)
        self.cnn_solver.net.copy_from(model_file_sup)
        self.cnn_net_sup = caffe.Net(prototxt_file_sup, model_file_sup,
                                     caffe.TRAIN)
        mean = read_mean(mean_file)
        mean = mean.reshape((1, 128, 128))
        self.cnn_mean_ = mean
        caffe.set_mode_gpu()
        if self.board_ladv_ == self.board_:
            caffe.set_device(self.board_ladv_)
        for l in model.lstm:
            self.lstm_stack_enc_.Add(lstm_spatial.LSTM(l))
        if model.dec_seq_length > 0:
            for l in model.lstm_dec:
                self.lstm_stack_dec_.Add(lstm.LSTM(l))
        if model.pre_seq_length > 0:
            for l in model.lstm_pre:
                self.lstm_stack_pre_.Add(lstm.LSTM(l))
        assert model.dec_seq_length > 0
        self.is_conditional_dec_ = model.dec_conditional
        if self.is_conditional_dec_ and model.dec_seq_length > 0:
            assert self.lstm_stack_dec_.HasInputs()
        self.squash_relu_ = False  #model.squash_relu
        self.squash_relu_lambda_ = 0  #model.squash_relu_lambda
        self.relu_data_ = False  #model.relu_data
        self.binary_data_ = True  #model.binary_data or self.squash_relu_
        self.only_occ_predict_ = model.only_occ_predict

        if len(model.timestamp) > 0:
            old_st = model.timestamp[-1]
            ckpt = os.path.join(model.checkpoint_dir,
                                '%s_%s.h5' % (model.name, old_st))
            f = h5py.File(ckpt)
            self.lstm_stack_enc_.Load(f)
            if model.dec_seq_length > 0:
                self.lstm_stack_dec_.Load(f)
            if model.pre_seq_length > 0 and not self.only_occ_predict_:
                self.lstm_stack_pre_.Load(f)
            f.close()
예제 #3
0
    def __init__(self, model):
        self.model_ = model  # keeps the model configurations alongside global configurations

        # stacks of encoder, decoder and future predictions
        self.lstm_stack_enc_ = lstm.LSTMStack()
        self.lstm_stack_dec_ = lstm.LSTMStack()
        self.lstm_stack_fut_ = lstm.LSTMStack()

        self.decoder_copy_init_state_ = model.decoder_copy_init_state
        self.future_copy_init_state_ = model.future_copy_init_state

        # add LSTM blocks for encoder, decoder and future predictor
        for l in model.lstm:
            # get LSTM encoder model according to specifications
            self.lstm_stack_enc_.Add(lstm.LSTM(l))
        if model.dec_seq_length > 0:
            for l in model.lstm_dec:
                # get LSTM decoder model according to specifications
                self.lstm_stack_dec_.Add(lstm.LSTM(l))
        if model.future_seq_length > 0:
            for l in model.lstm_future:
                # get LSTM future predictor model according to specifications
                self.lstm_stack_fut_.Add(lstm.LSTM(l))

        # do other initialization stuff
        assert model.dec_seq_length > 0 or model.future_seq_length > 0
        # get specification of whether decoder and future predictors are conditional on inputs
        self.is_conditional_dec_ = model.dec_conditional
        self.is_conditional_fut_ = model.future_conditional

        if self.is_conditional_dec_ and model.dec_seq_length > 0:
            assert self.lstm_stack_dec_.HasInputs()
        if self.is_conditional_fut_ and model.future_seq_length > 0:
            assert self.lstm_stack_fut_.HasInputs()

        self.squash_relu_ = model.squash_relu
        self.binary_data_ = model.binary_data or model.squash_relu
        self.squash_relu_lambda_ = model.squash_relu_lambda
        self.relu_data_ = model.relu_data

        # load model if available
        if len(model.timestamp) > 0:
            old_st = model.timestamp[-1]
            ckpt = os.path.join(model.checkpoint_dir,
                                '%s_%s.h5' % (model.name, old_st))
            f = h5py.File(ckpt)
            self.lstm_stack_enc_.Load(f)
            self.lstm_stack_dec_.Load(f)
            self.lstm_stack_fut_.Load(f)
            f.close()
예제 #4
0
def run(df, fold):
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    tokenizer = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.fit_on_texts(df.review.values.tolist())

    xtrain = tokenizer.texts_to_sequences(train_df.review.values)
    xtest = tokenizer.texts_to_sequences(valid_df.review.values)

    xtrain = tf.keras.preprocessing.sequence.pad_sequences(
        xtrain, maxlen=config.MAX_LEN)
    xtest = tf.keras.preprocessing.sequence.pad_sequences(
        xtest, maxlen=config.MAX_LEN)

    train_dataset = dataset.IMDBDataset(reviews=xtrain,
                                        targets=train_df.sentiment.values)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2)

    valid_dataset = dataset.IMDBDataset(reviews=xtest,
                                        targets=valid_df.sentiment.values)

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2)

    print('Loading Embeddings')
    embedding_dict = load_vectors('./crawl-300d-2M.vec')
    print('Embeddings Loaded')
    embedding_matrix = create_embedding_matrix(tokenizer.word_index,
                                               embedding_dict)

    device = torch.device('cuda')
    model = lstm.LSTM(embedding_matrix)

    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

    print('Training model')

    best_accuracy = 0

    early_stopping_counter = 0

    for epoch in range(config.EPOCHS):
        engine.train(train_data_loader, model, optimizer, device)

        outputs, targets = engine.evaluate(valid_data_loader, model, device)
        outputs = np.array(outputs) >= 0.5

        accuracy = metrics.accuracy_score(targets, outputs)

        print('Fold: ', fold, ' EPOCH: ', epoch, ' Accuracy Score: ', accuracy)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
        else:
            early_stopping_counter += 1
예제 #5
0
def main():

    run_id = np.random.randint(1000)

    if restore:
        with open('./saves/state.pkl', 'rb') as f:
            X, Y, char2ix, ix2char = pickle.load(f)
    else:
        X, Y, char2ix, ix2char = data.read_data("warandpeace.txt",
                                                sequence_length=100)

        with open('./saves/state.pkl', 'wb') as f:
            pickle.dump([X, Y, char2ix, ix2char], f)

    train_set = data.train_set(X, Y, 128)

    solver = lstm.LSTM(num_classes=len(char2ix),
                       heavy_device=heavy_device,
                       light_device=light_device,
                       restore=restore)

    if test == False:
        solver.train(train_set)
    else:
        print(solver.generate(char2ix, ix2char, 100))
예제 #6
0
def main(model):
    qa = None
    if model == 'lstm':
        import lstm
        qa = lstm.LSTM()
    else:
        import svm
        qa = svm.SVM()
    if qa == None:
        qa = svm.SVM()
    qa.train_save()
예제 #7
0
def gen_lstm_with(lstm_name, in_stack, exp_stack, inter_len):
    IN_LEN = in_stack.shape[1]
    OUT_LEN = inter_len
    CELL_STATE_LEN = OUT_LEN
    HIDDEN_LEN = OUT_LEN + IN_LEN

    FULL_OUT_LEN = exp_stack.shape[1]

    my_lstm = lstm.LSTM(lstm_name, SEQUENCE_LEN, IN_LEN, OUT_LEN, FULL_OUT_LEN,
                        BATCH_SIZE)

    return my_lstm
예제 #8
0
 def __init__(self, model):
   self.model_ = model
   self.lstm_stack_ = lstm.LSTMStack()
   for l in model.lstm:
     self.lstm_stack_.Add(lstm.LSTM(l))
   self.squash_relu_ = model.squash_relu
   self.squash_relu_lambda_ = model.squash_relu_lambda
   
   if len(model.timestamp) > 0:
     old_st = model.timestamp[-1]
     ckpt = os.path.join(model.checkpoint_dir, '%s_%s.h5' % (model.name, old_st))
     f = h5py.File(ckpt)
     self.lstm_stack_.Load(f)
     f.close()
예제 #9
0
	def load_context(self, context):

		light_device = "/cpu:0"
		heavy_device = "/cpu:0"

		with open('./saves/state.pkl', 'rb') as f:
			_, _, self.char2ix, self.ix2char = pickle.load(f)

		self.solver = lstm.LSTM(
			num_classes=len(self.char2ix),
			heavy_device=heavy_device,
			light_device=light_device,
			restore=True,
		)
예제 #10
0
    def compare_custom_and_cuda(batch_size, length, input_size, hidden_size, num_layers, bias,
                                inter_layer_dropout, recurrent_dropout, batch_first, skip_connection,
                                jit_forward_custom):
        if batch_first:
            input = torch.rand(batch_size, length, input_size)
        else:
            input = torch.rand(length, batch_size, input_size)

        hx = torch.rand(num_layers, batch_size, hidden_size), torch.rand(num_layers, batch_size, hidden_size)

        torch_lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                                   num_layers=num_layers, bias=bias, dropout=inter_layer_dropout,
                                   batch_first=batch_first, bidirectional=False)

        custom_lstm = lstm.LSTM(input_size=input_size, hidden_size=hidden_size,
                                num_layers=num_layers, bias=bias,
                                inter_layer_dropout=inter_layer_dropout, recurrent_dropout=recurrent_dropout,
                                skip_connection=skip_connection, batch_first=batch_first)

        CustomLSTMTest.copy_weights_from_torch_to_custom(torch_lstm, custom_lstm, bias)

        if jit_forward_custom:
            custom_lstm_forward = torch.jit.script(custom_lstm)
        else:
            custom_lstm_forward = custom_lstm
        torch_output, torch_hidden = torch_lstm(input, hx)
        custom_output, custom_hidden = custom_lstm_forward(input, hx)

        torch_output.pow(2).sum().backward()
        custom_output.pow(2).sum().backward()

        torch_testing.assert_allclose(torch_hidden[0], custom_hidden[0])
        torch_testing.assert_allclose(torch_hidden[1], custom_hidden[1])

        torch_testing.assert_allclose(custom_output, torch_output)

        torch_grads = [p.grad for p in torch_lstm.parameters() if p.grad is not None]
        custom_grads = [p.grad for p in custom_lstm.parameters() if p.grad is not None]

        assert len(torch_grads) == len(custom_grads)

        for i in range(len(torch_grads)):
            torch_testing.assert_allclose(torch_grads[i], custom_grads[i])
예제 #11
0
    #############test data
    test_sentence_label_list = utils.read_file(config.test_file)
    test_sentence_word_list = utils.sentence_extract(test_sentence_label_list)
    test_feat, test_candidate_category = utils.exact_feat(
        test_sentence_word_list)
    test_distence_feat = utils.cal_distence_feat(test_feat)

    # file = open('test_data.txt', 'w',encoding='utf-8')
    # for idx in range(len(test_feat)):
    #     for idy in range(len(test_feat[idx])):
    #         file.write(str(test_sentence_word_list[idx].words.strip('\n')) + '\t' + str(test_feat[idx][idy][0].word) + '\t' + str(test_feat[idx][idy][1].word) + '\t' + str(test_candidate_category[idx][idy]) + '\t' + str(test_distence_feat[idx][idy]) + '\t' + str(test_feat[idx][idy][0].sentiment) + '\t' + str(test_feat[idx][idy][1].sentiment) + '\n')
    # file.close()
    ######################

    # train_sentences, train_sentences_index, train_labels, train_labels_index, train_candidates, train_candidates_index, train_sparse_list, train_sparse_index = data.sentence_index('./train_data.txt')
    # test_sentences, test_sentences_index, test_labels, test_labels_index, test_candidates, test_candidates_index, test_sparse_list, test_sparse_index = data.sentence_index('./test_data.txt')
    train_data_set = data.sentence_index('./train_data.txt')
    if not args.static:
        data.fix_alphabet()
    test_data_set = data.sentence_index('./test_data.txt')
    dev_data_set = data.sentence_index('./dev_data.txt')

    if config.pretrained_wordEmb_file is not '':
        data.load_pretrained_emb_uniform(config.pretrained_wordEmb_file,
                                         config.word_dims)

    model = lstm.LSTM(config, data)

    train.train(train_data_set, dev_data_set, test_data_set, model, config,
                data)
예제 #12
0
파일: main.py 프로젝트: tomzhang/LiuSTM
		df_train_1 = pickle.load(open(root_dir + "df_train_1.p",'rb'))
		df_test_1 = pickle.load(open(root_dir + "df_test_1.p",'rb'))



	#---------------------- 1 time training ------------------------
	#-----------------------Load/Train the LSTM model---------------

	train = train_1 + train_2

	# True to training the data, False to laod the existed data
	print "Now the maxlen =", maxlen
	if True:
		dir_file = "weights/201702281025_e1_1k1k_l0_b64.p"
		print "Starting to training the model..., saving to", dir_file
		sls=lstm.LSTM(dir_file, maxlen, load=False, training=True)
		sls.train_lstm(train, epoch, train_1, test_1)
		sls.save_model()
	else:
		dir_file = "weights/201702212157_e100_1k1k_l0.p"
		print "NO Training. Load the existed model:", dir_file
		sls=lstm.LSTM(dir_file, maxlen, load=True, training=False)


	#--- New method to evaluate the results ------------------------
	#--------------------Evaluate the results using new method------
	if True:
		print "Evaluate the model using fast estimation..."
		projection1_train, projection2_train = sls.seq2vec(train_1)
		projection1_test, projection2_test = sls.seq2vec(test_1)
예제 #13
0
def run(df, fold):
    """
    Run training and validation for a given fold
    :param df: dataframe with kold column
    :param fold: current fold, int
    """
    # training dataframe
    train_df = df[df.kfold != fold].reset_index(drop=True)
    # validation dataframe
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    print("Fitting tokenizer")
    # tokenize
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='!~\t\n', )
    tokenizer.fit_on_texts(df.question.values.tolist())

    # convert training data to sequence
    xtrain = tokenizer.texts_to_sequences(train_df.question.values)
    # convert validation data to sequence
    xtest = tokenizer.texts_to_sequences(valid_df.question.values)
    # zero pad the trainign sequence, padding on left side
    xtrain = tf.keras.preprocessing.sequence.pad_sequences(
        xtrain, maxlen=config.MAX_LEN)
    # zero pad validation sequence
    xtest = tf.keras.preprocessing.sequence.pad_sequences(
        xtest, maxlen=config.MAX_LEN)
    # initialize dataset class for training
    train_dataset = dataset.QUORADataset(question=xtrain,
                                         OpenStatus=train_df.OpenStatus.values)

    # create torch DataLoader
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2)

    #initialize dataset class for validation
    valid_dataset = dataset.QUORADataset(question=xtest,
                                         OpenStatus=valid_df.OpenStatus.values)
    # create torch DataLoader
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1)

    print("loading embeddings")
    embedding_dict = load_vectors("../input/embeddings/crawl-300d-2M.vec")
    embedding_matrix = create_embedding_matrix(tokenizer.word_index,
                                               embedding_dict)
    # create torch device

    device = torch.device("cuda")
    # get LSTM model
    model = lstm.LSTM(embedding_matrix)
    # send model to device
    model.to(device)
    #initialize adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

    print("Training model")
    # best accuracy to 0
    best_accuracy = 0
    # early stopping counter
    early_stopping_counter = 0
    # train and validate for all epoch
    for epoch in range(config.EPOCHS):
        # train one epoch
        engine.train(train_data_loader, model, optimizer, device)
        # validate
        outputs, targets = engine.evaluate(valid_data_loader, model, device)
        print(outputs[:10])
        # threshold
        #outputs1=outputs
        outputs1 = outputs
        outputs = np.array(outputs) >= 0.5
        #print(outputs1[:10])
        # calculate accuracy
        accuracy = metrics.accuracy_score(targets, outputs)
        conf_m = confusion_matrix(targets, outputs)
        print(conf_m)
        roc_score = roc_auc_score(targets, outputs1)
        print('ROC AUC score\n', roc_score)

        print(f"Fold:{fold}, Epoch:{epoch}, Accuracy_score ={accuracy}")
        #print("conf_m\n",conf_m)
        print("---")
        # early stopping
        if accuracy > best_accuracy:
            best_accuracy = accuracy
        else:
            early_stopping_counter += 1

        if early_stopping_counter > 4:
            break
예제 #14
0
    def __init__(self,
                 rnn_type,
                 num_tokens,
                 embedding_size,
                 hidden_size,
                 num_layers,
                 input_dropout=0.,
                 input_noise_std=0.,
                 recurrent_dropout=0.,
                 inter_layer_dropout=0.,
                 output_dropout=0.,
                 output_noise_std=0.,
                 up_project_embedding=False,
                 up_project_hidden=False,
                 tie_weights=False,
                 lstm_skip_connection=False,
                 drop_state_probability=0.01):
        super(RNNModel, self).__init__()

        # Making sure either dropout or gaussian noise is activated
        assert not (input_dropout > 0. and input_noise_std > 0.)
        assert not (output_dropout > 0. and output_noise_std > 0.)

        self.input_dropout_or_noise = get_noise_layer(input_dropout,
                                                      input_noise_std)

        encoder_layer_list = []
        embedding_layer = nn.Embedding(num_tokens, embedding_size)

        encoder_layer_list.append(embedding_layer)
        if embedding_size != hidden_size and up_project_embedding:
            logging.info(
                "Encoder: adding linear transformation to up project embedding to hidden"
            )
            encoder_layer_list.append(
                nn.Linear(embedding_size, hidden_size, bias=False))
            rnn_input_size = hidden_size
        else:
            rnn_input_size = embedding_size
        self.encoder = nn.Sequential(*encoder_layer_list)

        if rnn_type in ['LSTM', 'GRU']:
            if recurrent_dropout > 0.:
                logging.warning(
                    "recurrent_dropout argument is only used in the custom LSTM model"
                )

            self.rnn = getattr(nn, rnn_type)(rnn_input_size,
                                             hidden_size,
                                             num_layers,
                                             dropout=inter_layer_dropout)
        elif rnn_type == "custom_LSTM":
            self.rnn = lstm.LSTM(rnn_input_size,
                                 hidden_size,
                                 num_layers,
                                 bias=True,
                                 inter_layer_dropout=inter_layer_dropout,
                                 recurrent_dropout=recurrent_dropout,
                                 skip_connection=lstm_skip_connection,
                                 batch_first=False)
        else:
            try:
                nonlinearity = {
                    'RNN_TANH': 'tanh',
                    'RNN_RELU': 'relu'
                }[rnn_type]
            except KeyError:
                raise ValueError(
                    """An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']"""
                )
            self.rnn = nn.RNN(rnn_input_size,
                              hidden_size,
                              num_layers,
                              nonlinearity=nonlinearity,
                              dropout=inter_layer_dropout)

        self.output_dropout_or_noise = get_noise_layer(output_dropout,
                                                       output_noise_std)

        decoder_list = []
        linear_layer = nn.Linear(embedding_size, num_tokens, bias=False)
        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        self.decoder_is_sequential = True
        if tie_weights:
            linear_layer.weight = embedding_layer.weight

            if hidden_size != embedding_size:
                if up_project_hidden:
                    self.decoder_is_sequential = False
                    logging.info(
                        "Decoder: adding linear transformation to up project embedding to to hidden"
                    )
                    decoder_list.append(nn.Linear(embedding_size, hidden_size))
                else:
                    logging.info(
                        "Decoder: adding linear transformation to down project hidden to embedding"
                    )
                    decoder_list.append(nn.Linear(hidden_size, embedding_size))

        decoder_list.append(linear_layer)
        if self.decoder_is_sequential:
            self.decoder = nn.Sequential(*decoder_list)
        else:
            self.decoder = nn.ModuleList(decoder_list)

        self.embedding_size = embedding_size
        self.rnn_type = rnn_type
        self.nhid = hidden_size
        self.nlayers = num_layers
        self.drop_state_probability = drop_state_probability
        self.init_weights()
예제 #15
0
def main():
    args.n_ehr = len(
        json.load(
            open(os.path.join(args.files_dir, 'demo_index_dict.json'),
                 'r'))) + 10
    args.name_list = json.load(
        open(os.path.join(args.files_dir, 'feature_list.json'), 'r'))[1:]
    args.input_size = len(args.name_list)
    files = sorted(glob(os.path.join(args.data_dir, 'resample_data/*.csv')))
    data_splits = json.load(
        open(os.path.join(args.files_dir, 'splits.json'), 'r'))
    train_files = [
        f for idx in [0, 1, 2, 3, 4, 5, 6] for f in data_splits[idx]
    ]
    valid_files = [f for idx in [7] for f in data_splits[idx]]
    test_files = [f for idx in [8, 9] for f in data_splits[idx]]
    if args.phase == 'test':
        train_phase, valid_phase, test_phase, train_shuffle = 'test', 'test', 'test', False
    else:
        train_phase, valid_phase, test_phase, train_shuffle = 'train', 'valid', 'test', True
    train_dataset = data_loader.DataBowl(args, train_files, phase=train_phase)
    valid_dataset = data_loader.DataBowl(args, valid_files, phase=valid_phase)
    test_dataset = data_loader.DataBowl(args, test_files, phase=test_phase)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=train_shuffle,
                              num_workers=args.workers,
                              pin_memory=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.workers,
                              pin_memory=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.workers,
                             pin_memory=True)

    args.vocab_size = args.input_size + 2

    if args.use_unstructure:
        args.unstructure_size = len(
            py_op.myreadjson(os.path.join(args.files_dir,
                                          'vocab_list.json'))) + 10

    # net = icnn.CNN(args)
    # net = cnn.CNN(args)
    net = lstm.LSTM(args)
    # net = torch.nn.DataParallel(net)
    # loss = myloss.Loss(0)
    loss = myloss.MultiClassLoss(0)

    net = _cuda(net, 0)
    loss = _cuda(loss, 0)

    best_metric = [0, 0]
    start_epoch = 0

    if args.resume:
        p_dict = {'model': net}
        function.load_model(p_dict, args.resume)
        best_metric = p_dict['best_metric']
        start_epoch = p_dict['epoch'] + 1

    parameters_all = []
    for p in net.parameters():
        parameters_all.append(p)

    optimizer = torch.optim.Adam(parameters_all, args.lr)

    if args.phase == 'train':
        for epoch in range(start_epoch, args.epochs):
            print('start epoch :', epoch)
            t0 = time.time()
            train_eval(train_loader, net, loss, epoch, optimizer, best_metric)
            t1 = time.time()
            print('Running time:', t1 - t0)
            best_metric = train_eval(valid_loader,
                                     net,
                                     loss,
                                     epoch,
                                     optimizer,
                                     best_metric,
                                     phase='valid')
        print('best metric', best_metric)

    elif args.phase == 'test':
        train_eval(test_loader, net, loss, 0, optimizer, best_metric, 'test')
예제 #16
0
    def __init__(self,
                 max_depth,
                 output_sizes,
                 node_feature_sizes,
                 learning_rate_vector,
                 learning_method_vector,
                 shuffle_levels=[],
                 adadelta_parameters=None,
                 momentum_vector=None):
        """Initializes a multi-level LSTM.
        The ML-LSTM has max_depth layers.  Layer 0 is the root node.
        Layers max_depth - 1 to 0 have LSTMs in them.
        Layer max_depth is simply composed of graph nodes, which forward their
        features to the LSTMs of level max_depth - 1.
        The output of level i consists in the LSTM features computed from the children of i;
        it does not contain any features computed from the node at level i itself.
        The features of the node at level i will be passed to node at level i-1 along
        with the LSTM output.

        @param max_depth: As noted above.
        @param node_feature_sizes: How many features are produced by a node, according to its depth.
            This can go from 0 to max_depth (included).  Be careful: unless e.g. the graph is
            bipartite, you need to use the same number throughout.
        @param output_sizes: How many features are produced by LSTMs at different depth.  This
            does not need to be constant.
        @param learning_rate_vector: Vector of learning rates.
        @param learning_method_vector: Vector of learning methods. It can be None, in which case
            adadelta is used, or it can be a vector consisting of 'adadelta' or 'momentum'
            or 'steady_rate' (the latter is not recommended) for each layer.
        @param momentum_vector: vector containing momentums for learning.  It can be None if
            adadelta is used.
        @param adadelta_parameters: vector of adadelta parameters.  It can be None if momentum
            learning is used.
        @param shuffle_children: a list (or set) of depths at which shuffling is to occur.
        """
        # First, some sanity checks.
        assert max_depth > 0
        assert len(output_sizes) == max_depth
        assert len(node_feature_sizes) == max_depth
        assert len(learning_method_vector) == max_depth
        assert adadelta_parameters is None or len(
            adadelta_parameters) == max_depth
        assert adadelta_parameters is not None or all(
            m != 'adadelta' for m in learning_method_vector)
        assert momentum_vector is None or len(momentum_vector) == max_depth
        assert momentum_vector is not None or all(
            m == 'steady_rate' for m in learning_method_vector)
        #assert [i < max_depth for i in shuffle_levels]

        self.output_sizes = output_sizes
        self.node_feature_sizes = node_feature_sizes
        self.max_depth = max_depth
        self.learning_rate_vector = learning_rate_vector
        self.learning_method_vector = learning_method_vector
        self.adadelta_parameters = adadelta_parameters
        self.momentum_vector = momentum_vector
        self.shuffle_levels = shuffle_levels

        # Creates the list of LSTMs, one per level.
        self.lstm_stack = [lstm.LSTM() for _ in range(max_depth)]
        for l in range(max_depth):
            self.lstm_stack[l].initialize(
                node_feature_sizes[l] +
                (0 if l == max_depth - 1 else output_sizes[l + 1]),
                output_sizes[l])

        # we need the following structures, when training with momentum and/or adadelta,
        # to keep track of the sum of dW at each level in order to update the momentum_dW
        # or the adadelta parameters of the respective LSTM modules.
        self.number_of_nodes_per_level = None
        self.sum_of_dWs = None
        self.sum_tot_sq_gradient = None
        self.sum_tot_gradient_weight = None
        self.sum_tot_sq_delta = None
        self.sum_tot_delta_weight = None
예제 #17
0
import lstm
import recurrent_network
import learning_methods

file_name = "/home/lie/lol.txt"

lang = recurrent_network.compute_language(file_name)

hl_size = 100
num_hl = 1
max_time_step = 25

net = lstm.LSTM(\
                hl_size, \
                num_hl,  \
                max_time_step, \
                lang \
)
epochs = 100
eta = 2
learning_method = learning_methods.AdaGrad(eta)

net.no_batch_learn( \
                file_name, \
                epochs, \
                learning_method, \
)
예제 #18
0
def test():
    config = load_config("lstm.conf")
    model = lstm.LSTM(config)
    get_loss_from_file("test", config, model)
예제 #19
0
def main(argv):

    print '\nSYSTEM START\n'
    print 'Emb Dim: %d\tHidden Dim: %d\tOptimization: %s\tLayer: %d\tEpoch: %d' %\
          (argv.emb, argv.hidden, argv.opt, argv.layer, argv.epoch)
    print 'Parameters to be saved: %s' % argv.save

    """data preprocessing"""
    print 'DATA Preprocessing...'
    corpus, vocab_word = utils.load_conll(argv.data)
    id_corpus = utils.convert_words_into_ids(corpus, vocab_word)
    train_samples = utils.convert_data(id_corpus)
    n_samples = len(id_corpus)
    print 'Samples: %d\tVocab: %d' % (n_samples, vocab_word.size())

    """symbol definition"""
    index = T.iscalar()
    w = T.ivector()
    d = T.ivector()
    n_hidden = argv.hidden
    n_words = argv.n_words
    batch_size = argv.batch

    """model setup"""
    print 'Compiling Theano Code...'
    model = lstm.LSTM(w=w, d=d, n_layers=argv.layer, vocab_size=vocab_word.size(), n_in=n_hidden, n_h=n_hidden,
                      n_words=n_words, batch_size=batch_size
                      )
    cost = model.nll
    opt = optimizers.main(name=argv.opt, cost=cost, params=model.params, emb=model.emb, x=model.x, w=model.w)

    """ train """
    def _train():
        train_model = theano.function(
            inputs=[index],
            outputs=[model.nll, model.errors],
            updates=opt,
            givens={
                w: train_samples[index * n_words * batch_size: (index+1) * n_words * batch_size],
                d: train_samples[index * n_words * batch_size + 1: (index+1) * n_words * batch_size + 1]
            },
            mode='FAST_RUN'
        )

        n_batch_samples = n_samples / n_words / batch_size
        print 'Vocabulary Size: %d\tBatch Sample Size: %d' % (vocab_word.size(), n_batch_samples)
        print '\nTrain START'

        for epoch in xrange(argv.epoch):
            print '\nEpoch: %d' % (epoch + 1)
            print '\tIndex: ',
            start = time.time()

            losses = []
            errors = []
            for b_index in xrange(n_batch_samples):
                if b_index % 100 == 0 and b_index != 0:
                    print b_index,
                    sys.stdout.flush()
                loss, error = train_model(b_index)
                losses.append(loss)
                errors.append(error)
            avg_loss = np.mean(losses)
            end = time.time()
            print '\tTime: %f seconds' % (end - start)
            print '\tAverage Negative Log Likelihood: %f' % avg_loss

            total = 0.0
            correct = 0
            for sent in errors:
                total += len(sent)
                for y_pred in sent:
                    if y_pred == 0:
                        correct += 1
            print '\tTrain Accuracy: %f' % (correct / total)
            if argv.save:
                model.save()

    _train()
    nrows = hf['x'].shape[0]
    ncols = hf['x'].shape[2]

    print(hf['x'].shape)
    print(hf['y'].shape)

ntrain = int(configs['data']['train_test_split'] * nrows)
steps_per_epoch = int(ntrain / configs['data']['batch_size'])

# ntrain를 batch_size의 배수로 만들어 경계 값을 명확히 한다.
ntrain = steps_per_epoch * configs['data']['batch_size']
print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with', steps_per_epoch, 'steps-per-epoch')

# Building a model
sess =tf.Session()
model = lstm.LSTM(sess, configs['data']['x_window_size'], ncols,
                     configs['model']['dirname_save_model'])

sess.run(tf.global_variables_initializer())

# Train the model
# data_gen_train = dl.generate_clean_data(0, ntrain)
# model.training(configs['model']['epochs'], steps_per_epoch, data_gen_train, save=True)

# Load a trained model
model.load_model('epoch0_loss6.74e-01')

ntest = nrows - ntrain
steps_test = int(ntest / configs['data']['batch_size'])
print('> Testing model on', ntest, 'data rows with', steps_test, 'steps')

batch_size = configs['data']['batch_size']
예제 #21
0
def run(df, fold):
    """
    Run training and validation for a given fold & dataset

    :param df: pandas dataframe with kfold column
    :param fold: current fold, int
    """
    # fetch training dataframe
    df_train = df[df.kfold != fold].reset_index(drop=True)
    # fetch validation dataframe

    df_valid = df[df.kfold == fold].reset_index(drop=True)

    tokenizer = tf.keras.preprocessing.text.Tokenizer()

    tokenizer.fit_on_texts(df_train.review.values)

    x_train = tokenizer.texts_to_sequences(df_train.review.values)

    x_valid = tokenizer.texts_to_sequences(df_valid.review.values)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(
        x_train, maxlen=config.MAXLEN)

    x_valid = tf.keras.preprocessing.sequence.pad_sequences(
        x_valid, maxlen=config.MAXLEN)

    #* embedding_dict: dictionary with word:embedding_vectors
    embedding_dict = load_vectors(
        "../input/wiki-news-300d-1M.vec/wiki-news-300d-1M.vec")

    #* word_index: dictionary with word:idx --  {'the': 1, 'cat': 2, 'sat': 3, 'on': 4}
    word_index = tokenizer.word_index

    #* embedding matrix: a dictionary with idx:embedding_vector
    embedding_matrix = create_embedding_matrix(word_index, embedding_dict)

    model = lstm.LSTM(embedding_matrix)

    optimizer = torch.optim.Adam(model.parameters, lr=1e-3)

    # check if GPU is available else run on CPU.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")

    train_dataset = dataset.IMDBDataset(reviews=x_train,
                                        targets=df_train.sentiment.values)

    valid_dataset = dataset.IMDBDataset(reviews=x_valid,
                                        targets=df_valid.sentiment.values)

    train_data_loader = torch.utils.DataLoader(
        train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4)

    valid_data_loader = torch.utils.DataLoader(
        valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2)

    best_accuracy = 0
    early_stopping_counter = 0
    for epoch in range(config.EPOCHS):

        engine.train(train_data_loader, model, optimizer, device)

        preds, targets = engine.evaluate(valid_data_loader, model, optimizer,
                                         device)

        print(f"preds----{preds}")

        preds = np.array(preds) >= 0.5

        accuracy = metrics.accuracy_score(preds, targets)

        print(f"Fold:{fold}, Epoch: {epoch},  Accuracy: {accuracy}")

        # simple early stopping
        if accuracy > best_accuracy:
            best_accuracy = accuracy

        else:
            early_stopping_counter += 1

        if early_stopping_counter > 2:
            break
예제 #22
0
    model_type = args.type
    if model_type == "relu":
        import relu_rnn
        model = relu_rnn.Classifier(
            relu_rnn.ReLURNN(
                embed_dim=dim,
                n_units=int(config["n_units"]),
                gpu=args.gpu
            )
        )
    elif model_type == "lstm":
        import lstm
        model = lstm.Classifier(
            lstm.LSTM(
                embed_dim=dim,
                n_units=int(config["n_units"]),
                gpu=args.gpu
            )
        )
    else:
        raise Exception("model argment should be relu or lstm")

    # load model

    init_model_name = os.path.join(
        model_dir,
        "model.npz"
    )
    if os.path.exists(init_model_name):
        serializers.load_npz(init_model_name, model)
        print("load model {}".format(init_model_name))
예제 #23
0
    # calculate the imbalance ratio. used for construct the loss(cost) fn.
    imbalance_ratio = num_majority / num_minority

steps_per_epoch = int(ntrain / configs['data']['batch_size'])

print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with',
      steps_per_epoch, 'steps-per-epoch')
print('> Class 0: {}, Class 1: {} --in training set'.format(
    num_majority, num_minority))
print('> imbalance_ratio(class_weight): {}'.format(imbalance_ratio))

# Building a model
sess = tf.Session()
model = lstm.LSTM(sess,
                  configs['data']['x_window_size'],
                  ncols,
                  configs['model']['dirname_save_model'],
                  class_weight=imbalance_ratio)

sess.run(tf.global_variables_initializer())

if ans_load_model in ['y', 'Y']:
    # Load a trained model
    model.load_model(configs['model']['filename_load_model'])
else:
    # Train the model
    data_gen_train = dl.generate_clean_data(0, ntrain)
    model.training(configs['model']['epochs'],
                   steps_per_epoch,
                   data_gen_train,
                   save=True)
예제 #24
0
with open('keys_mtjuney.yml', 'r') as f:
    keys_mtjuney = yaml.load(f)

parser = argparse.ArgumentParser()
parser.add_argument('--vocabin', '-vi', default='dumps/vocab_ready_in.dump')
parser.add_argument('--vocabout', '-vo', default='dumps/vocab_ready_out.dump')
parser.add_argument('--modelinput', '-mi', default=None)
parser.add_argument('--modeloutput', '-mo', default='dumps/model_lstm.dump')

args = parser.parse_args()

with open(args.vocabin, 'rb') as f:
    vocabin = pickle.load(f)
with open(args.vocabout, 'rb') as f:
    vocabout = pickle.load(f)
lstm = lstm.LSTM(650, vocabin, vocabout, loadpath=args.modelinput)

tweet_q = queue.Queue(maxsize=300)

# def feed_tweet():
#     global vocab, tweet_q, keys_mtjuney
#
#     api = OAuth1Session(keys_mtjuney['CONSUMER_KEY'], keys_mtjuney['CONSUMER_SECRET'], keys_mtjuney['ACCESS_TOKEN'], keys_mtjuney['ACCESS_SECRET'])
#     url = "https://api.twitter.com/1.1/statuses/home_timeline.json"
#
#
#     last_tweetid = None
#
#
#     while True:
#
예제 #25
0
            excluded_ids.append(row['region_id'])
    return excluded_ids

# with open('category_freqs.json', 'w') as f:
#     json.dump(category_freqs, f)
# pprint(category_freqs)

if __name__ == '__main__':

    print "******** Train model ****************"
    data_interface = data.Data(results_data_dir, [], [], [])
    print data_interface.vocab_size
    training = train.Learn(results_data_dir)
    for run in range(1, params.num_runs + 1):
        # this is the configuration for a model that knows all categories! Therefore, the list of excluded IDs is empty.
        model = lstm.LSTM(run, data_interface.vocab_size, results_data_dir, [],
                          data_interface.index_to_token)
        model.build_network()
        training.run_training(model, data_interface)

    generate_indextotoken(data_interface, results_data_dir, [])

    ##### run training for all categories
    # categories = defaultdict()
    # # reader = csv.reader(open("../eval/cats.txt"))
    # reader = csv.reader(open("cats.txt"))
    # for row in reader:
    #     categories[row[0].strip()] = row[1:]
    #
    # for key in categories.keys():
    #     print "******** Train model without:", categories[key][0].strip()
    #     cat_ids = parse_categories([key])
예제 #26
0
def run(df, fold):
    '''
    Run training and validation for given fold and dataset
    :param df: pandas dataframe with kfold column
    :param fold: current fold, int
    '''
    #fetching training dataframe
    train_df = df[df.kfold != fold].reset_index(drop= True)
    #fetch validation dataframe
    valid_df = df[df.kfold == fold].reset_index(drop= True)
    

    print('Fitting Tokenizer')
    #Using tf.keras for tokenization
    tokenizer = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.fit_on_texts(df.reviews.values.tolist())

    # convert training and validation data to sequences
    # for example : "bad movie" gets converted to
    # [24, 27] where 24 is the index for bad and 27 is  index for movie
    xtrain = tokenizer.text_to_sequences(train.df.reviews.values)
    xtest = tokenizer.texts_to_sequences(valid_df.reviews.values)


    # zero pad the training  & validation sequences given the maximum length
    # done on left hand side
    xtrain = tf.keras.preprocessing.sequence.pad_sequences(xtrain, maxlen=config.MAX_LEN)
    xtest = tf.keras.preprocessing.sequence.pad_sequences(xtest, maxlen=config.MAX_LEN)



    #intialize datset class for training
    train_Dataset = dataset.IMDDataset(reviews=xtrain, target= train_df.sentiment.values)
    # dataloader for training
    trian_data_loader = torch.utils.data.DataLoader(tain_dataset,
                                            batch_size=config.TRAIN_BATCH_SIZE,
                                            num_workers = 2)

    
    valid_Dataset = dataset.IMDDataset(reviews=xtest, target= valid_df.sentiment.values)
    # dataloader for training
    valid_data_loader = torch.utils.data.DataLoader(valid_dataset,
                                            batch_size=config.VALID_BATCH_SIZE,
                                            num_workers = 2)


    print('Loading Embeddings')
    #load embedding
    embedding_dict = load_vectors('crawl-300d-2M.vec')
    embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict)


    device = torch.device('cuda')
    model = lstm.LSTM(embedding_matrix)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr= 1e-3)


    print('Training Model')
    #set best accuracy & early stoping ouenter to 0
    best_accuracy = 0
    early_stopping_counter = 0

    #train and validate for all epochs
    for epoch in range(config.EPOCHS):
        #train & validate one epoch
        engine.train(train_data_loader, model, optimizer, device)
        outputs, targets = engine.evaluate(valid_data_loader, model, device)


        #use threshold 0.5 not sigmoid
        output = np.array(outputs)>=0.5

        #calculate accuracy
        accuracy = metrics.accuracy_score(target, outputs)
        print('Fold:{}, Epoch:{}, Accuracy Score:{}'.format(fold, epoch, accuracy))

        #simple early stopping
        if accuracy > best_accuracy:
            best_accuracy = accuracy
        else:
            early_stopping_counter += 1

        if early_stopping_counter > 2:
            break
예제 #27
0
def run(df, fold):
    """
    Run training and validation for a given fold
    and dataset
    :param df: pandas dataframe with kfold column
    :param fold: current fold, int
    """
    # fetch training dataframe
    train_df = df[df.kfold != fold].reset_index(drop=True)

    # fetch validation dataframe
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    print("Fitting tokenizer")
    # we use tf.keras for tokenization
    # you can use your own tokenizer and then you can
    # get rid of tensorflow
    tokenizer = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.fit_on_texts(df.review.values.tolist())

    # convert training data to sequences
    # for example : "bad movie" gets converted to
    # [24, 27] where 24 is the index for bad and 27 is the
    # index for movie
    xtrain = tokenizer.texts_to_sequences(train_df.review.values)
    xtest = tokenizer.texts_to_sequences(valid_df.review.values)

    # zero pad the training/validation sequences given the maximum length
    # this padding is done on left hand side
    # if sequence is > MAX_LEN, it is truncated on left hand side too
    xtrain = tf.keras.preprocessing.sequence.pad_sequences(xtrain,
                                                           maxlen=config.MAX_LEN)
    xtest = tf.keras.preprocessing.sequence.pad_sequences(xtest,
                                                          maxlen=config.MAX_LEN)
    # initialize dataset class for training
    train_dataset = dataset.IMDBDataset(reviews=xtrain,
                                        targets=train_df.sentiment.values)

    # create torch dataloader for training
    # torch dataloader loads the data using dataset
    # class in batches specified by batch size
    train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                    batch_size=config.TRAIN_BATCH_SIZE,
                                                    num_workers=2)

    # initialize dataset class for validation
    valid_dataset = dataset.IMDBDataset(reviews=xtest,
                                        targets=valid_df.sentiment.values)

    # create torch dataloader for validation
    valid_data_loader = torch.utils.data.DataLoader(valid_dataset,
                                                    batch_size=config.VALID_BATCH_SIZE,
                                                    num_workers=1)

    print("Loading embeddings")
    # load embeddings as shown previously
    embedding_dict = load_vectors("../input/crawl-300d-2M.vec")
    embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict)

    # create torch device, since we use gpu, we are using cuda
    device = torch.device("cuda")

    # fetch our LSTM model
    model = lstm.LSTM(embedding_matrix)

    # send model to device
    model.to(device)

    # initialize Adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    print("Training Model")

    # set best accuracy to zero
    best_accuracy = 0

    # set early stopping counter to zero
    early_stopping_counter = 0

    # train and validate for all epochs
    for epoch in range(config.EPOCHS):
        # train one epoch
        engine.train(train_data_loader, model, optimizer, device)
        # validate
        outputs, targets = engine.evaluate(valid_data_loader, model, device)

        # use threshold of 0.5
        # please note we are using linear layer and no sigmoid
        # you should do this 0.5 threshold after sigmoid
        outputs = np.array(outputs) >= 0.5
        # calculate accuracy
        accuracy = metrics.accuracy_score(targets, outputs)
        print(f"FOLD:{fold}, Epoch: {epoch}, Accuracy Score = {accuracy}")
        # simple early stopping
        if accuracy > best_accuracy:
            best_accuracy = accuracy
        else:
            early_stopping_counter += 1
        if early_stopping_counter > 2:
            break
예제 #28
0
path = args.data
dataset = data.TxtLoader(path)

params = {
    'nhid': args.nhid,
    'nlayers': args.nlayers,
    'dropout': args.dropout,
    'batch': args.batch_size,
    'seq': args.seq,
    'type': dtype,
    'alphabet_size': len(dataset.alphabet)
}

dataloaders = data.loaders(dataset, params)
model = lstm.LSTM(params).type(params['type'])
optimizer = optim.Adam(model.parameters(), lr=args.lr)
criterion = nn.CrossEntropyLoss()


def sequence_to_one_hot(sequence):
    """Turns a sequence of chars into one-hot Tensor"""

    batch_size = params['batch'] * (params['seq'] + 1)
    assert len(sequence) == batch_size, 'Sequence must be a batch'

    tensor = torch.zeros(len(sequence),
                         params['alphabet_size']).type(params['type'])

    for i, c in enumerate(sequence):
        tensor[i][dataset.char2ix[c]] = 1
예제 #29
0
파일: main.py 프로젝트: xususan/ml-for-nlp
	trigrams_lm = trigrams.TrigramsLM(vocab_size = len(TEXT.vocab), alpha=0.01, lambdas=[.2, .5, .3])
	criterion = nn.CrossEntropyLoss()
	trigrams_lm.train(train_iter, n_iters=None)
	print(utils.validate_trigrams(trigrams_lm, val_iter, criterion))
	print("Calculate Kaggle")
	kaggle_trigrams(trigrams_lm, "input.txt", "trigramsagain.txt")

elif args.model == 'Ensemble':
	print("TRAINING TRIGRAMS MODEL")
	trigrams_lm = trigrams.TrigramsLM(vocab_size = len(TEXT.vocab), alpha=1, lambdas=[.1, .4, .5])
	criterion = nn.CrossEntropyLoss()
	trigrams_lm.train(train_iter, n_iters=None)

	filename = 'lstm_large_hidden45.sav'
	print("LOADING LSTM MODEL")
	loaded_model = lstm.LSTM(embedding_size=EMBEDDING_SIZE, vocab_size=len(TEXT.vocab), num_layers=NUM_LAYERS, lstm_type='large')
	if CUDA:
		print("USING CUDA")
		loaded_model = loaded_model.cuda()
	loaded_model.load_state_dict(torch.load(filename))
	criterion = nn.CrossEntropyLoss()
	print("VALIDATION SET")
	loss = utilslstm.evaluate2(loaded_model, val_iter, criterion)

elif args.model == 'LSTM':
	# Save Model
	# rnn = lstm.LSTM(embedding_size=EMBEDDING_SIZE, vocab_size=len(TEXT.vocab), num_layers=NUM_LAYERS, lstm_type='large')
	# if CUDA:
	# 	print("USING CUDA")
	# 	rnn = rnn.cuda()
	# criterion = nn.CrossEntropyLoss()
    x, y = util.line_toseq(dataset_validate.pop(), charstop)
    if dense: dataset.append(util.seq_to_densevec(x, y, vdict))
    else: dataset.append(util.seq_to_sparsevec(x, y, charset))
    if not len(dataset_validate) % 1000:
        print "len(dataset_validate)", len(dataset_validate)
dataset_validate = dataset

#sys.exit()

min_val_loss = float("inf")  # very big
peak = 0
int_num = 0

print "Making LSTM..."
mylstm = lstm.LSTM(n_input=len(dataset_train[0][0][0]),
                   n_output=len(dataset_train[0][1][0]),
                   n_memblock=hidden_size,
                   lr=learning_rate)
#mylstm.load("m50saving1740")

print "Start Training... "
try:
    while True:
        numpy.random.shuffle(dataset_train)
        dt = [
            dataset_train[x:x + validate_interval]
            for x in xrange(1, len(dataset_train), validate_interval)
        ]
        for d in dt:
            mylstm.train(d)
            vcost, act, aco, atp, p, r, f = mylstm.test(dataset_validate)
            mylstm.save(modelname + "/saving-" + str(int_num))