from preprocess import data_split, normalize_data import matplotlib.pyplot as plt import numpy as np device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") DATASET_ROOT = './' df = pd.read_csv("./STT.csv", index_col=0) STT = df[df.symbol == 'STT'].copy() #print(GOOG) STT.drop(['symbol'], 1, inplace=True) #刪除symbol列 STT_new = normalize_data(STT) #print(GOOG_new) window = 30 X_train, y_train, X_test, y_test = data_split(STT_new, window) INPUT_SIZE = 5 HIDDEN_SIZE = 64 NUM_LAYERS = 1 OUTPUT_SIZE = 1 learning_rate = 0.001 num_epochs = 50 rnn = test_LSTM(input_dim=INPUT_SIZE, hidden_dim=HIDDEN_SIZE, num_layers=NUM_LAYERS, output_dim=OUTPUT_SIZE) optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) criterion = nn.MSELoss()
# -*- coding: utf-8 -*- """ Created on Mon Jan 28 18:05:07 2019 @author: sanchitagujral98 """ import preprocess as p import classification_models as cm import datetime import pickle import csv #StartTime = datetime.datetime.now() '''y_enc = p.labelEncoding() X_ngrams = p.tokenizer()''' X_train, X_test, Y_train, Y_test = p.data_split(0.33, 42) Dict = {} #Dict['build_id'] = str(StartTime) LogisticRegression_classifier, LogisticRegression_accuracy = cm.LogisticRegression_implemenation( X_train, X_test, Y_train, Y_test) print('Logistic Regression Accuracy: ', LogisticRegression_accuracy) LogisticRegression_accuracy = round(LogisticRegression_accuracy * 100, 2) Dict['LogisticRegression_classifier'] = LogisticRegression_accuracy filename = 'LogisticRegression.sav' pickle.dump(LogisticRegression_classifier, open(filename, 'wb')) LinearSVC_classifier, LinearSVC_accuracy = cm.LinearSVC_implemenation( X_train, X_test, Y_train, Y_test) print('Linear SVC Accuracy:', LinearSVC_accuracy)
help='path of DEG results') args = parser.parse_args() #1. Preprocess dataset ##Gene filtering and augmentation ex_gnames, ex_glists = preprocess.gene_filtering(args.refpath, args.genepath) ex_value, ex_gene, ex_label = preprocess.extract_filtered_gene( args.datapath, ex_gnames) metadatas = preprocess.get_metadata(ex_label) egene, rlds, aug_ages, aug_types, sp_size = preprocess.data_augmentation( ex_value, ex_gene, metadatas) #egene, rlds, aug_ages, aug_types, sp_size = preprocess.gaussian_augmentation(ex_value, ex_gene, metadatas, 141) #print (egene.shape, rlds.shape, aug_ages.shape, aug_types.shape) ##Rescaling(normalization) rcond_list, augcond_list = preprocess.indexing(aug_types, np.int(rlds.shape[0] / sp_size), sp_size) re_rld, rld_mean, max_rld_std, std_re_rld = preprocess.rescaling( rlds, augcond_list) ##Split data xtr, xte = preprocess.data_split('data_split.npz', re_rld, test_ratio=0.1) #xtr, xte = preprocess.data_split('gaussian_data_split.npz', re_rld, test_ratio=0.1) n_tr, n_te = len(xtr), len(xte) print('xtr.shape:', xtr.shape, 'xte.shape:', xte.shape) #2. Train WGAN+GP wgangp = WGAN_GP(xtr, n_te) dloss, gloss, genx = wgangp.train()
parser.add_argument("-bd","--bidirectional",type=bool,default=True) parser.add_argument("-f","--input_file_path",type=str) parser.add_argument("-n","--ngram_num",type=int,default=1) parser.add_argument("-e","--epoch_num",type=int,default=200) parser.add_argument("-sr","--split_rate",type=float,default=0.8) args = parser.parse_args() print(vars(args)) embedding_dim = args.embedding_dim hidden_num = args.hidden_num num_layer = args.num_layer batch_size = args.batch_size learning_rate = args.learning_rate dropout = args.dropout bidirectional = args.bidirectional input_file_path = "datasets/standard_files/" + args.input_file_path ngram_num = args.ngram_num epoch_num = args.epoch_num split_rate = args.split_rate Letter_dict = {} data = pd.read_csv(input_file_path,encoding="utf8") data = data.sample(frac=1) data.reset_index(drop=True) all_data = preprocess.df2list(data,"sequence","MIC","type",ngram_num,log_num=10) Letter_dict = preprocess.build_dict(all_data) weight_dict = torch.randn(len(Letter_dict)+1,embedding_dim) num1 = int(input_file_path[-13:-9]) # num1 = 0 num2 = 8407 train_data,test_data = preprocess.data_split(all_data,int((num1+num2)*split_rate)) main()