from sklearn.neighbors import KNeighborsClassifier from preprocessing.load_data import load_data import logging import numpy as np import pandas as pd import timeit logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG ) # 设置训练数据和测试数据的路径 test_file_path = '/home/jdwang/PycharmProjects/kaggleDigitRecognizer/train_test_data/' \ 'test.csv' test_X = load_data(test_file_path, image_shape=(784, ), returnlabel=False) train_file_path = '/home/jdwang/PycharmProjects/kaggleDigitRecognizer/train_test_data/' \ 'train.csv' train_X, train_y = load_data(train_file_path, image_shape=(784, ), returnlabel=True) logging.debug( 'the shape of train sample:%d,%d'%(train_X.shape)) logging.debug( 'the shape of test sample:%d,%d'%(test_X.shape)) rand_list = np.random.RandomState(0).permutation(len(train_X)) vc_split = 0.99 num_train = int(len(train_X)*vc_split) dev_X = train_X[rand_list][:num_train]
from utils.train import * from utils.evaluate import * from model.ShowAndTellModel import * from model.ShowAndTellRevise import * from preprocessing.load_data import load_data global_variable.train_set, global_variable.valid_set, global_variable.test_Set = input_data( ) global_variable.train_set['lang'], global_variable.train_set[ 'caption'] = txt2list('/data/PR_data/caption/train_single_process.txt') #################################### # DataLoader # #################################### BATCH_SIZE = 64 image_cap_dataset = load_data('train', sen_len=15) loader = Data.DataLoader(dataset=image_cap_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=16) input_size = 4096 hidden_size = 256 encoder1 = Encoder_ShowAndTellModel(input_size, hidden_size) decoder1 = Decoder_ShowAndTellModel(hidden_size, global_variable.train_set['lang'].n_words, 1, drop_prob=0.1) learning_rate = 0.0001
experiment['percent_training'] = 0.5 experiment['N_PI'] = 0 # num of PIs to estimate, if 0 calculate median only experiment['print_cost'] = 0 # 1 = plot quantile predictions experiment['plot_results'] = 1 # 1 = plot cost #-------------------------------------------------------------------------- # QFNN parameters: experiment['smooth_loss'] = 1 # 0 = pinball, 1 = smooth pinball loss experiment['g_dims'] = 1 # number of g(t) nodes experiment['epochs'] = 40_000 # number of training epochs experiment['alpha'] = 0.01 # smoothing rate experiment['eta'] = 0.5 # learning rate experiment['Lambda'] = 0.000 # L1 reg. to output weights, 0.0003 experiment['dropout'] = 0.45 # droput rate #-------------------------------------------------------------------------- # Data Preprocessing: experiment = load_data(experiment) experiment = split_data(experiment) experiment = set_coverage(experiment) # EDA(experiment) # experiment['tau'] = np.array([0.005, 0.01,0.015,0.02,0.025,0.975,0.98,0.985,0.99,0.995]) # experiment['N_tau'] = 10 # experiment['N_PI'] = 5 #-------------------------------------------------------------------------- # Prediction Methods: # experiment = model_QFNN(experiment) # experiment = model_QFNN_GridSearch(experiment) # experiment = model_QR(experiment,method=1,poly=1) # method = 0: QR, method = 1: QRNN # experiment = model_SVQR(experiment) # experiment = model_ETS(experiment, season = 24) experiment = model_SARIMA(experiment,
from model.ShowAndTellModel import * from model.ShowAndTellRevise import * from model.ST_ImageExtended import * from preprocessing.load_data import load_data global_variable.train_set, global_variable.valid_set, global_variable.test_Set = input_data( ) global_variable.train_set['lang'], global_variable.train_set[ 'caption'] = txt2list('/data/PR_data/caption/train_single_process.txt') #################################### # DataLoader # #################################### BATCH_SIZE = 64 SEN_LEN = 15 image_cap_dataset = load_data('train', 'fc2', sen_len=SEN_LEN) loader = Data.DataLoader(dataset=image_cap_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=16) input_size = 4096 hidden_size = 256 encoder1 = Encoder_ST_ImageExtended(input_size, hidden_size) decoder1 = Decoder_ST_ImageExtended(hidden_size, global_variable.train_set['lang'].n_words, 1, drop_prob=0.1) criterion = nn.NLLLoss()
verbose=1) # 将模型保存到磁盘中 p = ['save_models', "model_{}.model".format(i)] models[i].save(os.path.sep.join(p)) Hs.append(models[i]) # plot the training loss and accuracy N = epochs p = ['model_{}.png'.format(i)] plt.style.use('ggplot') plt.figure() plt.plot(np.arange(0, N), H.history['loss'], label='train_loss') plt.plot(np.arange(0, N), H.history['val_loss'], label='val_loss') plt.plot(np.arange(0, N), H.history['acc'], label='train-acc') plt.plot(np.arange(0, N), H.history['val_acc'], label='val-acc') plt.title("Training Loss and Accuracy for model {}".format(i)) plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend() plt.savefig(os.path.sep.join(p)) plt.close() if __name__ == '__main__': # args = args_parse() file_path = "dataset" (trainX, testX, trainY, testY) = load_data(file_path, classes) # train_models(trainX, testX, trainY, testY) predict(testX, testY)