Example #1
0
def setup_model():
    # input_ = T.matrix('features')
    # target = T.lmatrix('targets')
    input_ = T.tensor3("features")
    target = T.tensor3("targets")
    # model = MLPModel()
    model = LSTMModel()
    model.apply(input_, target)

    return model
def main(args):
    df = pd.read_csv(args.dataset)
    # df = df.iloc[::24,:]

    # Preprocess input and reshapes to
    # (num_samples, window_size, 1)
    processor = DataProcessor(window_size=args.window_size,
                              forecast_size=args.forecast,
                              shift=args.shift)
    train_X, train_y, test_X, test_y, raw_series = processor.preprocess(df)

    # train or load model
    lstm = LSTMModel(args.window_size, args.forecast)
    print(lstm.model.summary())
    if not args.eval_only:
        lstm.fit(train_X, train_y, epochs=args.epochs)
        lstm.save(args.model_path)
    else:
        lstm.load(args.model_path)

    # evaluation and plots
    preds = lstm.predict(test_X[-1].reshape(1, -1, 1))
    preds = processor.postprocess(preds)
    plot_test_datapoint(test_X[-1], test_y[-1], preds[0], args.forecast)

    preds_moving = moving_test_window_preds(lstm,
                                            test_X[0, :],
                                            n_future_preds=1000,
                                            step=args.forecast)
    preds_moving = np.array(preds_moving).reshape(-1, 1)
    preds_moving = processor.postprocess(preds_moving)

    plot_moving_window(df['datetime'], raw_series, preds_moving)
    def run_train(train_df):
        # init fasttext embedding weights
        Main.fasttext_embedding_init()

        model_obj = LSTMModel(max_sentence_size=Params.max_sentence_size,
                              embed_size=Params.embed_size,
                              vocab_size=len(Params.sentence_tokenizer.word_index) + 1,
                              lstm_units=Params.lstm_units,
                              dense_size=Params.dense_size,
                              label_size=Params.label_size)
        model = model_obj.get_model()

        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                         factor=Params.ReduceLROnPlateau_factor,
                                                         patience=Params.ReduceLROnPlateau_patience,
                                                         min_lr=Params.ReduceLROnPlateau_min_lr)

        if Params.optimizer == "sgd":
            optimizer = tf.keras.optimizers.SGD(learning_rate=Params.lr)
        elif Params.optimizer == "adam":
            optimizer = tf.keras.optimizers.Adam(learning_rate=Params.lr, beta_1=0.9, beta_2=0.999)

        model.compile(optimizer=optimizer, loss="mean_squared_error", metrics=["accuracy"])
        print("------------model summary-------------")
        print(model.summary())

        # split train-valid
        # validation_split=Params.validation_split # dataset sonundan % x'i valid olarak alıyor, yanlış yöntem !
        train, valid = train_test_split(train_df,
                                        stratify=train_df[["duplicate"]],
                                        test_size=Params.test_size,
                                        random_state=Params.random_state)

        my_callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=Params.early_stop_patience),
            reduce_lr
        ]
        history = model.fit([np.array(train["q1"].tolist()), np.array(train["q2"].tolist())],
                            np.array(train["duplicate"].tolist()),
                            batch_size=Params.batch_size,
                            epochs=Params.epoch,
                            validation_data=([np.array(valid["q1"].tolist()), np.array(valid["q2"].tolist())],
                                             np.array(valid["duplicate"].tolist())),
                            verbose=1,
                            shuffle=True,
                            callbacks=my_callbacks)
        model.save(os.path.join(Params.model_dir, "model.h5"))

        print("-------history---------")
        print(history.history)

        Main.plot(history)
        return model
def training_model(train_data,
                   test_data,
                   num_epochs,
                   batch_size=8,
                   input_dim=1,
                   hidden_dim=100,
                   output_dim=100,
                   seq_dim=7):

    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True)
    test_loader = DataLoader(dataset=train_data,
                             batch_size=batch_size,
                             shuffle=True,
                             drop_last=True)
    Mymodel = LSTMModel(input_dim, hidden_dim, 1, output_dim)
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(Mymodel.parameters(), lr=0.0001)
    iters = 0
    hisloss = []
    for epoch in range(num_epochs):
        for data_val, target in train_loader:
            # clean the previous gredient
            optimizer.zero_grad()
            outputs = Mymodel(data_val)
            #calculate loss
            loss = loss_function(outputs, target)
            hisloss.append(loss.item())
            # using loss to calculate gredient, stored in model
            loss.backward()
            # using gredient to update model parameters
            optimizer.step()
            iters += 1
            if iters % 300 == 0:
                for test_val, test_target in test_loader:
                    test_outputs = Mymodel(test_val)
                    loss2 = loss_function(test_outputs, test_target)
                print('Iteration: {}. TrainLoss: {}. TestLoss: {}'.format(
                    iters, loss.item(), loss2.item()))
                torch.save(
                    Mymodel.state_dict(),
                    'Trained_model/trained_model_' + str(iters) + '.pkl')

    plt.plot(hisloss)
    plt.xlabel('Iteration')
    plt.ylabel('Training loss')
    plt.title('Traing process')
    plt.grid(True)
    plt.savefig('Trained_model/loss.png')
    return Mymodel
 def __init__(self, t=DEFAULT_CONSEC_FRAMES):
     print("fall detector init")
     start_time = time.time()
     self.consecutive_frames = t
     self.args = self.cli()
     argss = [copy.deepcopy(self.args) for _ in range(self.args.num_cams)]
     self.model = LSTMModel(h_RNN=32,
                            h_RNN_layers=2,
                            drop_p=0.2,
                            num_classes=7)
     self.model.load_state_dict(
         torch.load('lstm2.sav', map_location=argss[0].device))
     print("Model Loaded")
     print("Model loaded in time: " + str(time.time() - start_time))
def load(args, checkpoint_dir):
    state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth'))
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if 'module' in k:
            namekey = k[7:]  # remove `module.`
        else:
            namekey = k
        new_state_dict[namekey] = v

    if args.model_type == 'bert':
        config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin'))
        model = BertForSequenceClassification(config)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'cnn':
        model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels,
                         num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'lstm':
        model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels,
                          hidden_size=args.hidden_size, device=args.device)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'char-cnn':
        model = CharCNN(num_features=args.num_features, num_classes=args.num_labels)
        model.load_state_dict(new_state_dict)
    else:
        raise ValueError('model type is not found!')

    return model.to(args.device)
Example #7
0
def start(config):
    global sess
    print(config)
    model = LSTMModel(config)
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    return train(train_set, valid_set, test_set, model)
Example #8
0
def main(_):

    vocabulary = Vocabulary()
    vocabulary.load_vocab(FLAGS.vocab_file)

    if os.path.isdir(FLAGS.checkpoint_path):
        FLAGS.checkpoint_path =\
            tf.train.latest_checkpoint(FLAGS.checkpoint_path)

    model = LSTMModel(vocabulary.vocab_size, sampling=True,
                    lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers,
                    use_embedding=FLAGS.use_embedding,
                    embedding_size=FLAGS.embedding_size)

    model.load(FLAGS.checkpoint_path)

    start = vocabulary.encode(FLAGS.start_string)
    arr = model.predict(FLAGS.max_length, start, vocabulary.vocab_size)
    print(vocabulary.decode(arr))
Example #9
0
def lstmTrain(args):
    data_loader = TextLoader('data', batchSize, numSteps)
    args.vocabSize = data_loader.vocab_size

    print args.vocabSize

    _lstmModel = LSTMModel(args)

    with tf.Session() as trainSess:

        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())

        for currEpoch in xrange(numEpochs):

            # For reading batches of training data.
            currBatchPointer = 0

            # Set the learning rate. Decay after every epoch.
            trainSess.run(
                tf.assign(_lstmModel.learningRate,
                          learningRate * decayRate**e))
            state = _lstmModel.initialState.eval()

            for currBatch in xrange(numBatches):

                # Set input and target output data for current batch.
                inData = inDataBatches[currBatchPointer]
                targetData = targetDataBatches[currBatchPointer]

                #print inData

                # We will feed the data to the session.
                inputFeed = {
                    _lstmModel.inputData: x,
                    _lstmModel.targetOutput: y,
                    _lstmModel.initialState: state
                }

                trainLoss, state, _ = trainSess.run([
                    _lstmModel.cost, _lstmModel.final_state,
                    _lstmModel.trainStep
                ], inputFeed)
                print "epoch".currEpoch
                print "trainingLoss".trainLoss

                # Save a checkpoint
                if currEpoch % 5 == 0:
                    checkpointPath = os.path.join(args.save_dir,
                                                  'lstmModel.ckpt')
                    saver.save(trainSess,
                               checkpoint_path,
                               global_step=currEpoch * numBatches + currBatch)
                    print "Saving checkpoint".format(checkpoint_path)
Example #10
0
def main():

    X_train = load_X(X_train_signals_paths)
    X_test = load_X(X_test_signals_paths)

    y_train = load_y(y_train_path)
    y_test = load_y(y_test_path)

    # Input Data

    training_data_count = len(X_train)  # 7352 training series (with 50% overlap between each serie)
    test_data_count = len(X_test)  # 2947 testing series
    n_steps = len(X_train[0])  # 128 timesteps per series
    n_input = len(X_train[0][0])  # 9 input parameters per timestep


    # Some debugging info

    print("Some useful info to get an insight on dataset's shape and normalisation:")
    print("(X shape, y shape, every X's mean, every X's standard deviation)")
    print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
    print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")

    for lr in learning_rate:
        arch = cfg.arch
        if arch['name'] == 'LSTM1' or arch['name'] == 'LSTM2':
            net = LSTMModel()
        elif arch['name'] == 'Res_LSTM':
            net = Res_LSTMModel()
        elif arch['name'] == 'Res_Bidir_LSTM':
            net = Res_Bidir_LSTMModel()
        elif arch['name'] == 'Bidir_LSTM1' or arch['name'] == 'Bidir_LSTM2':
            net = Bidir_LSTMModel()
        else:
            print("Incorrect architecture chosen. Please check architecture given in config.py. Program will exit now! :( ")
            sys.exit()
        net.apply(init_weights)
        print(diag)
        opt = torch.optim.Adam(net.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        net = net.float()
        params = train(net, X_train, y_train, X_test, y_test, opt=opt, criterion=criterion, epochs=epochs, clip_val=clip_val)
        evaluate(params['best_model'], X_test, y_test, criterion)
        plot(params['epochs'], params['train_loss'], params['test_loss'], 'loss', lr)
        plot(params['epochs'], params['train_accuracy'], params['test_accuracy'], 'accuracy', lr)
Example #11
0
def main():
    # create instance of config
    config = Config()
    
    # create instance of datadeal
    datadeal = DataDeal(config)
    #get input data
    train_in, train_out, valid_in, valid_out, scaler, valid_x = datadeal.data_Deal()
    
    # create instance of model
    model = LSTMModel(config)
    # get result
    pre_value, act_value = model.run_Session(train_in, train_out, valid_in, valid_out)
    #model.run_Session(train_in, train_out, valid_in, valid_out)
    
    # calculate RMSE
    value_pre, value_real = datadeal.inv_Scale(scaler = scaler, valid_x = valid_x, data_pre = pre_value, data_act = act_value)
    
    # save result
    with open(config.dir_output + "result_2.txt", "w") as f:
        f.write("PRE" + "\t" + "ACT" + "\n")
        for i in range(len(value_pre)):
            f.write(str(int(value_pre[i])) + "\t" + str(int(value_real[i])) + "\n")
def evaluation(provincename, cityname, modelpath, data):
    lat, long = get_location_using_baidu(provincename+cityname)
    Mymodel = LSTMModel(1, 100, 1, 100)
    Mymodel.load_state_dict(torch.load(modelpath))
    series = data.loc[(data["provinceName"] == provincename) & (data["cityName"] == cityname), "ts"].values.tolist()
    series = np.reshape(series,(-1,3))
    if np.isnan(series[-1][0]):
        series = series[:-1]
    diff_series = np.diff(series,axis = 0)
    n = len(diff_series)
    predict_series = np.array(series[0:7,0:1])
    store_diff = np.array(diff_series[0:7,0:1])
    for i in range(n-7):
        seq = np.array(diff_series[i:i+7])
        total_recover = np.sum(seq[:,1])
        total_death = np.sum(seq[:,2])
        seq = seq[:,0:1]
        mean = np.mean(seq[:,0],axis = 0)
        std = np.std(seq[:,0],axis = 0)
        seq -= mean
        if std!=0:
            seq /= std
        tensor_seq = torch.tensor(seq, dtype=torch.float, requires_grad=False)
        add_seq = torch.tensor([[lat], [long], [total_recover], [total_death], [mean], [std]])
        tensor_seq = torch.cat([tensor_seq,add_seq])
        tensor_seq.resize_(1, 13, 1)
        predictions = np.array(Mymodel(tensor_seq).tolist()[0])
        real_diff = predictions * std + mean
        store_diff = np.append(store_diff,[real_diff],axis = 0)
        if i>=n-7:
            print (diff_series)
            print ([real_diff[0],0,0])
            diff_series = np.append(diff_series,[real_diff[0],0,0],axis = 0)
            predict_series = np.append(predict_series,[np.array(list(map(sum,zip(predict_series[-1],real_diff))))],axis = 0)
        else:
            predict_series = np.append(predict_series,[np.array(list(map(sum,zip(series[i+6][0:1],real_diff))))],axis = 0)
    return series, predict_series, provincename+cityname, store_diff, diff_series
Example #13
0
def main(_):
    if os.path.exists(checkpoint_path) is False:
        os.makedirs(checkpoint_path)

    # 读取训练文本
    with open(datafile, 'r', encoding='utf-8') as f:
        train_data = f.read()

    # 加载/生成 词典
    vocabulary = Vocabulary()
    if FLAGS.vocab_file:
        vocabulary.load_vocab(FLAGS.vocab_file)
    else:
        vocabulary.build_vocab(train_data)
    vocabulary.save(FLAGS.vocab_file)

    input_ids = vocabulary.encode(train_data)

    g = batch_generator(input_ids, FLAGS.batch_size, FLAGS.num_steps)

    model = LSTMModel(vocabulary.vocab_size,
                      batch_size=FLAGS.batch_size,
                      num_steps=FLAGS.num_steps,
                      lstm_size=FLAGS.lstm_size,
                      num_layers=FLAGS.num_layers,
                      learning_rate=FLAGS.learning_rate,
                      train_keep_prob=FLAGS.train_keep_prob,
                      use_embedding=FLAGS.use_embedding,
                      embedding_size=FLAGS.embedding_size)
    model.train(
        g,
        FLAGS.max_steps,
        checkpoint_path,
        FLAGS.save_every_n,
        FLAGS.log_every_n,
    )
Example #14
0
def load_model(path_to_state_dict: str, path_to_config_file: str):

    with open(path_to_config_file, 'r') as file:
        config_file = json.load(file)
    
    Model = LSTMModel(**config_file)
    Model.load_state_dict(torch.load(path_to_state_dict))

    Model.eval() #LN in eval mode

    return Model, config_file
Example #15
0
def test_lstm(data_size: float, epoch: int, batch_size: int = 64):
    """
    Evaluate the LSTM model on the DSTC2 data
    :param data_size: size of data sliced
    :param epoch: number of epochs to train the model
    :param batch_size: batch size for each training
    :return:
    """
    training_data, training_labels = DSTC2.trainset(500).word_vecs(
        raw_label=True)

    model = LSTMModel(training_data, training_labels, max_feature_length=50)
    model.verbose = 1
    model.train(data_size, epoch, batch_size)

    testing_data, testing_labels = DSTC2.testset(500).word_vecs(raw_label=True)
    return 'ld', data_size, epoch, model.predict(testing_data, testing_labels)
Example #16
0
def test_lstm_reuters(data_size: float, epoch: int, batch_size: int = 64):
    """
    Evaluate the LSTM model on the Reuters data
    :param data_size: size of data sliced
    :param epoch: number of epochs to train the model
    :param batch_size: batch size for each training
    :return:
    """
    reuters = Reuters(num_words=500, maxlen=500)
    training_data, training_labels = reuters.training_set()
    testing_data, testing_labels = reuters.testing_set()

    model = LSTMModel(training_data,
                      training_labels,
                      max_feature_length=500,
                      top_words=5000)
    model.verbose = 1
    model.train(data_size, epoch, batch_size)

    return 'lr', data_size, epoch, model.predict(testing_data, testing_labels)
Example #17
0
class RecurrentACModel(nn.Module, torch_ac.RecurrentACModel):
    def __init__(self, env, obs_space, action_space, ignoreLTL, gnn_type,
                 dumb_ac, freeze_ltl):
        super().__init__()

        # Decide which components are enabled
        self.use_progression_info = "progress_info" in obs_space
        self.use_text = not ignoreLTL and (gnn_type == "GRU" or gnn_type
                                           == "LSTM") and "text" in obs_space
        self.use_ast = not ignoreLTL and ("GCN"
                                          in gnn_type) and "text" in obs_space
        self.gnn_type = gnn_type
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.action_space = action_space
        self.dumb_ac = dumb_ac

        self.freeze_pretrained_params = freeze_ltl
        if self.freeze_pretrained_params:
            print("Freezing the LTL module.")

        self.env_model = getEnvModel(env, obs_space)

        # Define text embedding
        if self.use_progression_info:
            self.text_embedding_size = 32
            self.simple_encoder = nn.Sequential(
                nn.Linear(obs_space["progress_info"], 64), nn.Tanh(),
                nn.Linear(64, self.text_embedding_size),
                nn.Tanh()).to(self.device)
            print(
                "Linear encoder Number of parameters:",
                sum(p.numel() for p in self.simple_encoder.parameters()
                    if p.requires_grad))

        elif self.use_text:
            self.word_embedding_size = 32
            self.text_embedding_size = 32
            if self.gnn_type == "GRU":
                self.text_rnn = GRUModel(
                    obs_space["text"], self.word_embedding_size, 16,
                    self.text_embedding_size).to(self.device)
            else:
                assert (self.gnn_type == "LSTM")
                self.text_rnn = LSTMModel(
                    obs_space["text"], self.word_embedding_size, 16,
                    self.text_embedding_size).to(self.device)
            print(
                "RNN Number of parameters:",
                sum(p.numel() for p in self.text_rnn.parameters()
                    if p.requires_grad))

        elif self.use_ast:
            hidden_dim = 32
            self.text_embedding_size = 32
            self.gnn = GNNMaker(self.gnn_type, obs_space["text"],
                                self.text_embedding_size).to(self.device)
            print(
                "GNN Number of parameters:",
                sum(p.numel() for p in self.gnn.parameters()
                    if p.requires_grad))

        # Memory specific code.
        self.image_embedding_size = self.env_model.size()
        self.memory_rnn = nn.LSTMCell(self.image_embedding_size,
                                      self.semi_memory_size)
        self.embedding_size = self.semi_memory_size

        print("embedding size:", self.embedding_size)
        if self.use_text or self.use_ast or self.use_progression_info:
            self.embedding_size += self.text_embedding_size

        if self.dumb_ac:
            # Define actor's model
            self.actor = PolicyNetwork(self.embedding_size, self.action_space)

            # Define critic's model
            self.critic = nn.Sequential(nn.Linear(self.embedding_size, 1))
        else:
            # Define actor's model
            self.actor = PolicyNetwork(self.embedding_size,
                                       self.action_space,
                                       hiddens=[64, 64, 64],
                                       activation=nn.ReLU())

            # Define critic's model
            self.critic = nn.Sequential(nn.Linear(self.embedding_size, 64),
                                        nn.Tanh(), nn.Linear(64, 64),
                                        nn.Tanh(), nn.Linear(64, 1))

        # Initialize parameters correctly
        self.apply(init_params)

    @property
    def memory_size(self):
        return 2 * self.semi_memory_size

    @property
    def semi_memory_size(self):
        return self.image_embedding_size

    def forward(self, obs, memory):
        x = self.env_model(obs)

        hidden = (memory[:, :self.semi_memory_size],
                  memory[:, self.semi_memory_size:])
        hidden = self.memory_rnn(x, hidden)
        embedding = hidden[0]
        memory = torch.cat(hidden, dim=1)

        if self.use_progression_info:
            embed_ltl = self.simple_encoder(obs.progress_info)
            embedding = torch.cat(
                (embedding,
                 embed_ltl), dim=1) if embedding is not None else embed_ltl

        # Adding Text
        elif self.use_text:
            embed_text = self.text_rnn(obs.text)
            embedding = torch.cat(
                (embedding,
                 embed_text), dim=1) if embedding is not None else embed_text

        # Adding GNN
        elif self.use_ast:
            embed_gnn = self.gnn(obs.text)
            embedding = torch.cat(
                (embedding,
                 embed_gnn), dim=1) if embedding is not None else embed_gnn

        # Actor
        dist = self.actor(embedding)

        # Critic
        x = self.critic(embedding)
        value = x.squeeze(1)

        return dist, value, memory

    def load_pretrained_gnn(self, model_state):
        # We delete all keys relating to the actor/critic.
        new_model_state = model_state.copy()

        for key in model_state.keys():
            if key.find("actor") != -1 or key.find("critic") != -1:
                del new_model_state[key]

        self.load_state_dict(new_model_state, strict=False)

        if self.freeze_pretrained_params:
            target = self.text_rnn if self.gnn_type == "GRU" or self.gnn_type == "LSTM" else self.gnn

            for param in target.parameters():
                param.requires_grad = False
Example #18
0
def train(trainX, trainY, epoch, lr, batchSize, modelPath, lookBack, method):

    lossFilePath = "../model/loss_ResRNN-4.pkl"
    output = open(lossFilePath, 'wb')
    lossList = []

    n = trainX.shape[0]
    print("trainx num is:", n)
    batchNum = n // batchSize - 1

    print("batch num is:", batchNum)

    if method == "RNN":
        net = RNNModel(inputDim=1,
                       hiddenNum=100,
                       outputDim=1,
                       layerNum=1,
                       cell="RNN")
    if method == "LSTM":
        net = LSTMModel(inputDim=1,
                        hiddenNum=100,
                        outputDim=1,
                        layerNum=1,
                        cell="LSTM")
    if method == "GRU":
        net = GRUModel(inputDim=1,
                       hiddenNum=100,
                       outputDim=1,
                       layerNum=1,
                       cell="GRU")
    if method == "ResRNN":
        #net = ResidualRNNModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="RNNCell")
        net = ResRNNModel(inputDim=1, hiddenNum=100, outputDim=1, resDepth=-1)
    if method == "attention":
        net = AttentionRNNModel(inputDim=1,
                                hiddenNum=100,
                                outputDim=1,
                                seqLen=lookBack)

    if method == "ANN":
        net = ANNModel(inputDim=lookBack, hiddenNum=100, outputDim=1)

    if method == "new":
        net = DecompositionNetModel(inputDim=lookBack,
                                    fchiddenNum=100,
                                    rnnhiddenNum=100,
                                    outputDim=1)
    optimizer = optim.RMSprop(net.parameters(), lr=lr, momentum=0.9)
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    #optimizer = optim.SGD(net.parameters(), lr=0.001)

    t1 = time.time()
    for i in range(epoch):
        trainX, trainY = shuffle(trainX, trainY, random_state=epoch)
        batchStart = 0
        lossSum = 0

        for j in range(batchNum):

            x = trainX[batchStart:batchStart + batchSize, :, :]
            y = trainY[batchStart:batchStart + batchSize]

            x = torch.from_numpy(x)
            y = torch.from_numpy(y)
            x, y = Variable(x), Variable(y)

            optimizer.zero_grad()

            if method == "new":
                pred = net.forward(x, batchSize=batchSize)
                # criterion = nn.MSELoss()
                #loss = criterion(pred, y)
                loss = MSE_Loss(pred, y)
            else:
                pred = net.forward(x, batchSize=batchSize)
                criterion = nn.MSELoss()
                loss = criterion(pred, y)

            lossSum += loss.data.numpy()[0]
            if j % 30 == 0 and j != 0:
                print("current loss is:", lossSum / 10)
                lossList.append(lossSum / 10)
                lossSum = 0

            #net.zero_grad()
            loss.backward()
            optimizer.step()
            #scheduler.step(loss)

            batchStart += batchSize
        print("%d epoch is finished!" % i)
    t2 = time.time()
    print("train time:", t2 - t1)
    p.dump(lossList, output, -1)

    torch.save(net, modelPath)
Example #19
0
# -*- coding: utf-8 -*-
# @Author: LogicJake
# @Date:   2018-11-13 19:02:55
# @Last Modified time: 2018-11-18 20:46:42
from preprocessing import Preprocessing
from model import LSTMModel
if __name__ == '__main__':
    preprocessing = Preprocessing('../..//dataset/input.csv',
                                  '../../dataset/output.csv')
    preprocessing.reformat()

    model = LSTMModel()
    model.train()
Example #20
0
    # Constructs the newtork.
    network = args.network.lower()
    vocab_size = len(vocab)
    num_classes = len(train_ds.label_list)
    pad_token_id = vocab.to_indices('[PAD]')
    if network == 'bow':
        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'bigru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'bilstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='bidirect',
                          padding_idx=pad_token_id)
    elif network == 'bilstm_attn':
        lstm_hidden_size = 196
        attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
        model = BiLSTMAttentionModel(attention_layer=attention,
                                     vocab_size=vocab_size,
                                     lstm_hidden_size=lstm_hidden_size,
                                     num_classes=num_classes,
                                     padding_idx=pad_token_id)
    elif network == 'birnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'cnn':
Example #21
0
    max_step = 20000

    # Directory where the checkpoints will be saved
    checkpoint_dir = './training_checkpoints'

    # Name of the checkpoint files
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{step}")

    input_ids = vocabulary.encode(train_data)
    batch_data = batch_generator(input_ids, batch_size, seq_len)

    model = LSTMModel(vocabulary.vocab_size,
                      batch_size=batch_size,
                      num_steps=seq_len,
                      lstm_size=128,
                      num_layers=2,
                      sampling=False,
                      drop_out=0.5,
                      use_embedding=False,
                      embedding_size=128)

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

    # define metrics
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    step = 0
    for nb, (X, y) in enumerate(batch_data):
        start = time.time()
        train_loss.reset_states()
        step += 1
Example #22
0
test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    collate_fn=lstm_collate_fn,
)
###############################################################################

encoder_cnn = EncoderCNN(emb_size)
encoder_cnn = encoder_cnn.to(device)

if model == "lstm":
    f_rnn = LSTMModel(emb_size,
                      emb_size,
                      emb_size,
                      device,
                      bidirectional=False)
    b_rnn = LSTMModel(emb_size,
                      emb_size,
                      emb_size,
                      device,
                      bidirectional=False)
f_rnn = f_rnn.to(device)
b_rnn = b_rnn.to(device)

criterion = nn.CrossEntropyLoss()
params_to_train = (list(encoder_cnn.parameters()) + list(f_rnn.parameters()) +
                   list(b_rnn.parameters()))
optimizer = torch.optim.SGD(params_to_train, lr=2e-1, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)
Example #23
0
		target_pl_train_X, target_pl_train_Y, target_pl_train_bi_X, target_pl_train_weight = utils_data.get_zx_pl_data()

init_embedding = utils_data.get_embedding(FLAGS.target)
# init_bi_embedding = utils_data.get_bi_embedding(FLAGS.target)

tfConfig = tf.ConfigProto()
tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory

with tf.Graph().as_default(), tf.Session(config=tfConfig) as sess:

	if FLAGS.target == "zx":
		vocab_size = 4704
		bi_vocab_size = 250734

	if FLAGS.model == "lstm":
		m = LSTMModel(config.hidden_size, config.max_grad_norm, config.num_layers, vocab_size, 
			config.embedding_size, config.num_classes, config.learning_rate, config.bi_direction, init_embedding)
	elif FLAGS.model == "lstmlm":
		m = LSTMLMModel(config.hidden_size, config.max_grad_norm, config.num_layers, vocab_size, 
			config.embedding_size, config.num_classes, config.learning_rate, config.bi_direction, init_embedding)
	
	sess.run(tf.global_variables_initializer())

	best_valid_f1 = 0.

	model_path = "model/%s_%s_%s_%s_%s.ckpt" % (FLAGS.model, FLAGS.source, FLAGS.target, str(FLAGS.pl), FLAGS.name)
	saver = tf.train.Saver()

	# saver.restore(sess, model_path)

	for epoch in range(config.max_epochs):
                        help='number of output units for model (default: 30)')
    parser.add_argument('--seed', type=int, default=1111,
                        help='random seed (default: 1111)')
    parser.add_argument('--model_type', type=str, default='none',
                        help='model type to execute (default: none, pass VRAE for executing)')
    
    
    args = parser.parse_args()

    curr_time = strftime("%Y%m%d%H%M%S", localtime())
    # args.cuda = torch.cuda.is_available()

    # initialize model and params
    
    if args.model == 'LSTM':
        model = LSTMModel(cuda=args.cuda)
    elif args.model == 'TCN':
        channel_sizes = [args.nhid] * args.levels
        model = TCNModel(args.nhid, args.opsize, channel_sizes, args.ksize, args.dropout, 128, use_cuda=args.cuda)
    elif args.model == 'SOCIAL':
        model = Social_Model(cuda=args.cuda)
    elif args.model == 'VRAE':
        model = VRAE(sequence_length=30, number_of_features=2, block='GRU')
    
    if args.mode == 'train':
        logger_dir = './runs/' + args.model + '/' + curr_time + '/'
        model_dir = './models/' + args.model + '/' + curr_time + '/'
        os.makedirs(model_dir)
    else:
        logger_dir=None
        model_dir=args.model_dir
Example #25
0
        print("Creating dictionary...")
        dictionary = Dictionary(query_files)
        with open("./saved/dictionary.pkl", "wb") as f:
            pickle.dump(dictionary, f)
    
    nchar = len(dictionary)
    max_seq_len = dictionary.max_seq_len

    lr = args.lr
    clip = args.clip
    batch_size = args.batch_size
    eval_batch_size = 10
    best_val_loss = None

    if args.model == 'LSTM':
        model = LSTMModel(nchar, args.nhid, args.nlayers, max_seq_len, args.dropout)
        if args.load_latest:
            latest = max([f for f in os.listdir("./saved/lstm")])
            latest_path = os.path.join("./saved/lstm", latest)
            model = model.load_state_dict(torch.load(latest_path))
        model = model.to(device)

    save(model, args.save)

    criterion = nn.NLLLoss(ignore_index=0)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        print("Start training...")
        for epoch in tqdm(range(1, args.epochs+1)):
Example #26
0
            print('Basic Dialog RNN Model.')

        elif args.base_model == 'GRU':
            model = GRUModel(D_m,
                             D_e,
                             D_h,
                             n_classes=n_classes,
                             dropout=args.dropout)

            print('Basic GRU Model.')

        elif args.base_model == 'LSTM':
            model = LSTMModel(D_m,
                              D_e,
                              D_h,
                              n_classes=n_classes,
                              dropout=args.dropout)

            print('Basic LSTM Model.')

        else:
            print('Base model must be one of DialogRNN/LSTM/GRU/Transformer')
            raise NotImplementedError

        name = 'Base'

    if cuda:
        model.cuda()

    # for daily_dialog_bert2.pkl
Example #27
0
    def __init__(self, env, obs_space, action_space, ignoreLTL, gnn_type,
                 dumb_ac, freeze_ltl):
        super().__init__()

        # Decide which components are enabled
        self.use_progression_info = "progress_info" in obs_space
        self.use_text = not ignoreLTL and (gnn_type == "GRU" or gnn_type
                                           == "LSTM") and "text" in obs_space
        self.use_ast = not ignoreLTL and ("GCN"
                                          in gnn_type) and "text" in obs_space
        self.gnn_type = gnn_type
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.action_space = action_space
        self.dumb_ac = dumb_ac

        self.freeze_pretrained_params = freeze_ltl
        if self.freeze_pretrained_params:
            print("Freezing the LTL module.")

        self.env_model = getEnvModel(env, obs_space)

        # Define text embedding
        if self.use_progression_info:
            self.text_embedding_size = 32
            self.simple_encoder = nn.Sequential(
                nn.Linear(obs_space["progress_info"], 64), nn.Tanh(),
                nn.Linear(64, self.text_embedding_size),
                nn.Tanh()).to(self.device)
            print(
                "Linear encoder Number of parameters:",
                sum(p.numel() for p in self.simple_encoder.parameters()
                    if p.requires_grad))

        elif self.use_text:
            self.word_embedding_size = 32
            self.text_embedding_size = 32
            if self.gnn_type == "GRU":
                self.text_rnn = GRUModel(
                    obs_space["text"], self.word_embedding_size, 16,
                    self.text_embedding_size).to(self.device)
            else:
                assert (self.gnn_type == "LSTM")
                self.text_rnn = LSTMModel(
                    obs_space["text"], self.word_embedding_size, 16,
                    self.text_embedding_size).to(self.device)
            print(
                "RNN Number of parameters:",
                sum(p.numel() for p in self.text_rnn.parameters()
                    if p.requires_grad))

        elif self.use_ast:
            hidden_dim = 32
            self.text_embedding_size = 32
            self.gnn = GNNMaker(self.gnn_type, obs_space["text"],
                                self.text_embedding_size).to(self.device)
            print(
                "GNN Number of parameters:",
                sum(p.numel() for p in self.gnn.parameters()
                    if p.requires_grad))

        # Memory specific code.
        self.image_embedding_size = self.env_model.size()
        self.memory_rnn = nn.LSTMCell(self.image_embedding_size,
                                      self.semi_memory_size)
        self.embedding_size = self.semi_memory_size

        print("embedding size:", self.embedding_size)
        if self.use_text or self.use_ast or self.use_progression_info:
            self.embedding_size += self.text_embedding_size

        if self.dumb_ac:
            # Define actor's model
            self.actor = PolicyNetwork(self.embedding_size, self.action_space)

            # Define critic's model
            self.critic = nn.Sequential(nn.Linear(self.embedding_size, 1))
        else:
            # Define actor's model
            self.actor = PolicyNetwork(self.embedding_size,
                                       self.action_space,
                                       hiddens=[64, 64, 64],
                                       activation=nn.ReLU())

            # Define critic's model
            self.critic = nn.Sequential(nn.Linear(self.embedding_size, 64),
                                        nn.Tanh(), nn.Linear(64, 64),
                                        nn.Tanh(), nn.Linear(64, 1))

        # Initialize parameters correctly
        self.apply(init_params)
Example #28
0
def main(path_to_data: str,
         cache_dir: str,
         texts_col: str,
         labels_col: str,
         n_classes: int,
         batch_size: int,
         batch_size_eval: int,
         min_lr: int,
         max_lr: int,
         n_epochs: int,
         cuda: int = 0):
    '''

    '''
    df = pd.read_csv(path_to_data)

    if os.path.isdir(cache_dir):
        logger.info('Cache dir found here {}'.format(cache_dir))
        pass
    else:
        logger.info('Creating cache dir')
        os.mkdir(cache_dir)

    # Preprocess
    optimal_length = get_length(df, texts_col)
    X, vocab_size = encode_texts(df,
                                 texts_col,
                                 max_seq_length=optimal_length,
                                 return_vocab_size=True)

    y = get_labels(df, labels_col, n_classes)

    train_loader, test_loader = create_TorchLoaders(
        X,
        y,
        test_size=0.10,
        batch_size=batch_size,
        batch_size_eval=batch_size_eval)

    Model = LSTMModel(vocab_size=vocab_size, n_classes=n_classes)

    config_dict = {
        "vocab_size": vocab_size,
        "n_classes": n_classes,
        "max_length": optimal_length
    }

    if n_classes > 2:
        criterion = torch.nn.CrossEntropyLoss()
    else:
        criterion = torch.nn.BCEWithLogitsLoss()

    optim = torch.optim.Adam(Model.parameters())

    ## Heuristic
    opt_cycle = ((((len(X) * (1 - 0.10)) / batch_size) * n_epochs) * 0.25) / 2

    schedul = torch.optim.lr_scheduler.CyclicLR(optim,
                                                min_lr,
                                                max_lr,
                                                step_size_up=opt_cycle,
                                                step_size_down=opt_cycle,
                                                mode="exp_range",
                                                cycle_momentum=False,
                                                gamma=0.999)

    if cuda == 1:
        Model.cuda()
        device = "cuda"
    else:
        device = "cpu"

    metrics = {
        "training_loss": [],
        "eval_loss": [],
        "training_f1": [],
        "eval_f1": []
    }

    logger.info("Starting training for {} epochs".format(n_epochs))

    for epoch in range(n_epochs):
        Model.train()
        progress = progressbar.ProgressBar()
        for batch in progress(train_loader):
            batch = tuple(t for t in batch)

            inputs, labels = batch  #unpacking
            inputs = inputs.to(device, dtype=torch.long)
            labels = labels.to(device, dtype=torch.float)

            preds = Model(inputs)
            loss = criterion(preds, labels)

            ## Metrics computation
            metrics["training_loss"].append(loss.item())

            preds = preds.to("cpu").detach().numpy()
            preds = flat_pred(preds, 0.5)
            tmp_f1 = f1_score(labels.to("cpu").detach().numpy(),
                              preds,
                              average='macro')

            metrics["training_f1"].append(tmp_f1)

            ## Backward pass ##
            loss.backward()

            optim.step()  #Gradient descent
            schedul.step()
            Model.zero_grad()

        logger.info(
            "Epoch {} done with: training loss: {}\n training f1: {}".format(
                epoch, loss.item(), tmp_f1))

        ## Eval
        progress = progressbar.ProgressBar()
        Model.eval()
        for batch in progress(test_loader):
            with torch.no_grad():  #computationaly efficient
                batch = tuple(t for t in batch)

                inputs, labels = batch
                inputs = inputs.to(device, dtype=torch.long)
                labels = labels.to(device, dtype=torch.float)

                preds = Model(inputs)
                eval_loss = criterion(preds, labels)

                ## Eval metrics
                metrics["eval_loss"].append(eval_loss.item())

                preds = preds.to("cpu").detach().numpy()
                preds = flat_pred(preds, 0.5)
                tmp_f1 = f1_score(labels.to("cpu").detach().numpy(),
                                  preds,
                                  average='macro')  ## detach

                metrics["eval_f1"].append(tmp_f1)

        logger.info(
            "Evaluation at iteration {} done: eval loss: {}\n eval f1: {}".
            format(epoch, eval_loss.item(), tmp_f1))

    ## Bring back model to cpu
    Model.cpu()

    ## Get/Save param dict
    logger.info('Saving model in cache dir {}'.format(cache_dir))
    torch.save(Model.state_dict(), os.path.join(cache_dir, 'state_dict.pt'))
    with open(os.path.join(cache_dir, 'config_model.json'), 'w') as file:
        json.dump(config_dict, file)
    #Checking if graphic card is able to process cuda operation
    if using_gpu:
        device = "cuda:2"
        print("Training on GPU")
    else:
        device = "cpu"
        print("Training on CPU")

    os.path.dirname(os.path.abspath(__file__))

    #Hyperparameter for Models
    modelparameters = [
        34, 128, 35, 2, 1, 0.2
    ]  #inputnNeurons, hidden hize, output neurons,layers, directions, dropout
    model = LSTMModel(modelparameters[0], modelparameters[1],
                      modelparameters[2], modelparameters[3],
                      modelparameters[4],
                      modelparameters[5])  #creating the model
    model.to(device)

    #Hyperparameter of the Training
    batch_size = 6
    valid_batch_size = 11
    epochs = 500
    learning_rate = 0.001
    print_every = 5
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    home_directory = os.path.dirname(os.path.abspath(__file__))

    training_path = ""
Example #30
0
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if os.path.exists(os.path.join(args.model, 'checkpoint.pth.tar')):
        # load existing model
        model_info = torch.load(os.path.join(args.model, 'checkpoint.pth.tar'))
        print("==> loading existing model '{}' ".format(model_info['arch']))
        original_model = models.__dict__[model_info['arch']](pretrained=False)
        model = LSTMModel(original_model, model_info['arch'],
                          model_info['num_classes'], model_info['lstm_layers'],
                          model_info['hidden_size'], model_info['fc_size'])
        # print(model)
        model.cuda()
        model.load_state_dict(model_info['state_dict'])
        best_prec = model_info['best_prec']
        cur_epoch = model_info['epoch']
    else:
        if not os.path.isdir(args.model):
            os.makedirs(args.model)
        # load and create model
        print("==> creating model '{}' ".format(args.arch))
        original_model = models.__dict__[args.arch](pretrained=True)
        model = LSTMModel(original_model, args.arch,
                          len(train_dataset.classes), args.lstm_layers,
                          args.hidden_size, args.fc_size)
Example #31
0
import torch

from dataset import SquaresDataset
from model import LSTMModel
import train_test_old
import train_test

FRAME_WIDTH = 10
LSTM_INPUT_SIZE = 50
LSTM_HIDDEN_SIZE = 32
BATCH_SIZE = 32

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = LSTMModel(LSTM_INPUT_SIZE, LSTM_HIDDEN_SIZE)
model = model.to(device)

squares_dataset_train = SquaresDataset(frame_width=FRAME_WIDTH, n=1000)
squares_dataset_test = SquaresDataset(frame_width=FRAME_WIDTH, n=100)
xtion1_train_loader = torch.utils.data.DataLoader(squares_dataset_train,
                                                  batch_size=BATCH_SIZE)
xtion1_test_loader = torch.utils.data.DataLoader(squares_dataset_test,
                                                 batch_size=BATCH_SIZE)

train_test.train(model,
                 xtion1_train_loader,
                 xtion1_test_loader,
                 n_classes=2,
                 epochs=2)