예제 #1
0
파일: run.py 프로젝트: 0xballistics/wamdare
def run_lstm():
    print("LOADING DATA")
    full_set = get_data()
    print_data_examples(full_set[0], full_set[1])
    print("BUILDING MODEL")
    model = build_lstm_model()
    train_test(model, *full_set)
예제 #2
0
def train_eval(params, embedding_matrix, folds, output_path):
    """Generates, trains and evaluates a model for each fold.

    Args:
        params: dictionary containing training and evaluation parameters
        embedding_matrix: embedding matrix that is used in the model
        folds: data folds that are used for training and validation
        output_path: path to the folder that will contain the training and evaluation results

    Returns:

    """
    path = save_load.create_ts_path(params, output_path)

    histories = []

    for i, fold in enumerate(folds):
        x_train, y_train = preprocessing.generateXY(fold[0], params['pad_length'])
        x_valid, y_valid = preprocessing.generateXY(fold[1], params['pad_length'])

        m = model.build_lstm_model(y_train.shape, params, embedding_matrix)
        history, trained_model = train_model(x_train, y_train, x_valid, y_valid, m, params['batch_size'], params['epochs'])

        save_load.save_model(trained_model, path, i)

        h = save_load.save_history(path+'/history_fold{}.csv'.format(i), history.history)
        histories.append(h)

    save_load.save_dictionary(path, params, 'params.json')
    save_load.write_final_results(path, histories)
    def train(self, batch_size=64, epochs=10):
        print('Starting training ...')
        x = self.train_x  # Input
        y = self.train_y  # Output
        if exists(self.model_file):
            #load model file
            model = load_model(self.model_file)
        else:
            model = build_lstm_model(self.vocab_size, self.embedding_size,
                                     self.pretrained_weights)

        model.fit([x], y, batch_size=batch_size, epochs=epochs, verbose=1)
        print('Model Training Completed')

        model.save(self.model_file)
        print('Trained Model saved as : ', self.model_file)
예제 #4
0
    def train(self, batch_size=128, epochs=10):
        print('\n Starting training ...')
        x = self.train_x  # Input
        y = self.train_y  # Output

        print('\t Shape of Input data : ', x.shape)
        print('\t Shape of Output data : ', y.shape)

        if exists(self.model_file):
            #load model file
            print('\t Selected existing model to train \n')
            model = load_model(self.model_file)
        else:
            print('\t Building Model from Scratch \n')
            model = build_lstm_model(self.vocab_size, self.embedding_size,
                                     self.pretrained_weights)

        model.fit([x], y, batch_size=batch_size, epochs=epochs, verbose=1)
        print('\t Model Training Completed')

        model.save(self.model_file)
        print('\t Trained Model saved as : ', self.model_file)
예제 #5
0

if __name__ == '__main__':

   data = pd.read_csv('./imputated_data.csv')
   process_args = dict()
   process_args['data'] = data
   process_args['split_ratio'] = [0.6, 0.2, 0.2]
   process_args['normalize_scaler'] = MinMaxScaler()
   process_args['block_size'] = int(43800 / 219)
   process_args['feature_cols'] = ['AQI', 'PM2_5', 'PM2_5_24H', 'PM_10', 'PM_10_24H', 'SO2','SO2_24H', 'NO2', 'NO2_24H', 'O3', 'O3_24H', 'O3_8H', 'O3_8H_24H', 'CO','CO_24H']
   for col in ['PM2_5', 'PM_10', 'SO2', 'NO2', 'O3', 'CO']:
      process_args['target_index'] = process_args['feature_cols'].index(col)
      process_args['lag_hour'] = 24
      process_args['forward_hour'] = 6
      process_args['target_name'] = col
      dataset = MlDataset(preprocess, process_args)

      model = MlModel("LSTM-FC", build_lstm_model((-1, process_args['lag_hour'], len(process_args['feature_cols']))), model_args=None, is_net=True, input_shape=(-1, process_args['lag_hour'], len(process_args['feature_cols'])), task="regression")
      network_args = dict()
      network_args['epochs'] = 200
      network_args['batch_size'] = 256
      network_args['shuffle'] = False
      network_args['x'] = dataset.get_data(label='train', shape=model.input_shape)[0]
      network_args['y'] = dataset.get_data(label='train', shape=model.input_shape)[1]
      network_args['validation_data'] = (dataset.get_data(label='valid', shape=model.input_shape))

      training = MlTraining(model=model, data=dataset, fit_args=network_args)
      training.fit()
      training.evaluate()
예제 #6
0
def main():
    # Change to get new results
    numpy.random.seed(123456)

    # Reading stock data
    # Edit these strings to change which stocks to create predictions from.
    raw_1 = pandas.read_csv('data/TEVA.csv', usecols=[2],
                            engine='python').values.astype('float32')
    raw_2 = pandas.read_csv('data/GOLD.csv', usecols=[2],
                            engine='python').values.astype('float32')

    # plt.plot(stock_1)
    # plt.show()

    scaler_1 = preprocessing.StandardScaler().fit(raw_1)
    scaler_2 = preprocessing.StandardScaler().fit(raw_2)

    stock_1 = scaler_1.transform(raw_1)
    stock_2 = scaler_2.transform(raw_2)

    # Creating training and testing data
    stock_1 = build_stock_input(data_list=stock_1,
                                input_size=100,
                                test_ratio=0.1,
                                step_size=5)
    stock_2 = build_stock_input(data_list=stock_2,
                                input_size=100,
                                test_ratio=0.1,
                                step_size=5)

    train_x1 = stock_1[0]
    train_x2 = stock_2[0]
    train_y1 = stock_1[1]
    train_y2 = stock_2[1]

    test_x1 = stock_1[2]
    test_x2 = stock_2[2]
    test_y = stock_1[3]

    print("Loaded and processed data")

    # print(train_y)

    # # Creating a scaler object and apply it to the data set
    # Does not work in 2D
    # stocks_test = scaler.fit_transform(stocks_test)
    loadModelFromFile = False
    if not loadModelFromFile:

        model = build_lstm_model()

        # Training model
        tensorboard = TensorBoard(log_dir="logs/{}".format("lstm_test"))
        model.fit([train_x1, train_x2], [train_y1, train_y2],
                  verbose=1,
                  epochs=20,
                  callbacks=[tensorboard])

        model.save('save/model.h5')
        print("Result saved!")

    else:
        model = load_model('save/model.h5')

    # Creating predictions
    # predictions = prediction_invalid(model, [test_x1, test_x2], len(test_x1))[0]
    predictions = model.predict([test_x1, test_x2])[0]

    #Creating difference graph
    difference = []
    buy_sell = []
    threshold = 0.0035
    for i in range(1, len(predictions), 1):
        difference.append(predictions[i] - predictions[i - 1])
        if (abs(difference[-1]) > threshold):
            buy_sell.append(round(abs(difference[-1][0]) / difference[-1][0]))
        else:
            buy_sell.append(0)

    money_result = [0]
    nr_of_stocks = 0
    money = 100
    actual_price = build_stock_input(data_list=raw_1,
                                     input_size=100,
                                     test_ratio=0.1,
                                     step_size=5)[3]

    # You get the result one day in advance!
    for i in range(1, len(buy_sell), 1):
        if buy_sell[i] > 0:
            nr_of_stocks += money / actual_price[i - 1]
            money = 0
        elif buy_sell[i] < 0:
            money += nr_of_stocks * actual_price[i - 1]
            nr_of_stocks = 0

        money_result.append(money + nr_of_stocks * actual_price[i])

    growth = (money_result[-1] - money_result[1]) / money_result[1]
    stock_growth = (actual_price[-1] - actual_price[1]) / actual_price[1]

    print("Model start: " + str(money_result[0]))
    print("Model end: " + str(money_result[-1]))
    print("Stock start: " + str(actual_price[0]))
    print("Stock end: " + str(actual_price[-1]))
    print("Model growth: " + str(growth))
    print("Stock growth: " + str(stock_growth))

    subPlots = True
    if subPlots:
        fig, ax = plt.subplots(nrows=4)

        ax[0].set_title("Predicted price (for tomorrow)")
        ax[0].plot(predictions)

        ax[1].set_title("Buy (1) /Sell (-1)")
        ax[1].plot(buy_sell)

        ax[2].set_title("Growth (initially 100)")
        ax[2].plot(money_result)

        ax[3].set_title("Actual data")
        ax[3].plot(actual_price)

        plt.tight_layout()

    else:
        plt.plot(predictions)
        plt.plot(test_y)

    plt.show()
예제 #7
0
# Read intent data
data = pd.read_csv('data/intent_classification_data.csv',encoding='latin1',names=['Sentence','Intent'])

#Shuffle data
data = data.sample(frac=1).reset_index(drop=True)

number_of_intent = len(data.Intent.unique())


tokenizer,word_index_dict,index_word_dict,vocab_size,max_input_length,input_vector=utils.create_tokenizer_encoding_padding(data.Sentence, padding='post')

lable_encoding,label = utils.label_encode(data.Intent)

x_train,y_train,x_test,y_test = utils.prepare_train_test(input_vector, label)

model = build_lstm_model(vocab_size, max_input_length, n_classes=number_of_intent, loss='sparse_categorical_crossentropy')

history = model.fit(x_train,y_train,epochs=50,batch_size=30)

##Slice data to define xtest data
validate_data = data.loc[890:]
validate_data = validate_data.reset_index(drop=True)
validate_data[['predict','score','predict_prob']]=validate_data.Sentence.apply(lambda x : predict(model,x,tokenizer,lable_encoding,max_input_length,padding='post'))
category_label = np.unique(validate_data.Intent)
############################
###SHAP Value
############################
#text='How to apply for the loan'
#shap_cat_list,shap_count_list = utils.shap_values(model,x_train,text,tokenizer,index_word_dict,max_input_length,padding='post')
#explainer = shap.KernelExplainer(model, x_train[:10])
#validate_data[['shap_cat_list','shap_value']] = validate_data.Sentence.progress_map(lambda x : utils.shap_values(model,x_train,x,tokenizer,index_word_dict,max_input_length,padding='post'))