from preprocess import data_split, normalize_data
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
DATASET_ROOT = './'

df = pd.read_csv("./STT.csv", index_col=0)

STT = df[df.symbol == 'STT'].copy()
#print(GOOG)
STT.drop(['symbol'], 1, inplace=True)  #刪除symbol列
STT_new = normalize_data(STT)
#print(GOOG_new)
window = 30
X_train, y_train, X_test, y_test = data_split(STT_new, window)

INPUT_SIZE = 5
HIDDEN_SIZE = 64
NUM_LAYERS = 1
OUTPUT_SIZE = 1

learning_rate = 0.001
num_epochs = 50

rnn = test_LSTM(input_dim=INPUT_SIZE,
                hidden_dim=HIDDEN_SIZE,
                num_layers=NUM_LAYERS,
                output_dim=OUTPUT_SIZE)
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
Esempio n. 2
0
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 28 18:05:07 2019

@author: sanchitagujral98
"""
import preprocess as p
import classification_models as cm
import datetime
import pickle
import csv
#StartTime = datetime.datetime.now()
'''y_enc = p.labelEncoding()
X_ngrams = p.tokenizer()'''

X_train, X_test, Y_train, Y_test = p.data_split(0.33, 42)

Dict = {}
#Dict['build_id'] = str(StartTime)

LogisticRegression_classifier, LogisticRegression_accuracy = cm.LogisticRegression_implemenation(
    X_train, X_test, Y_train, Y_test)
print('Logistic Regression Accuracy: ', LogisticRegression_accuracy)
LogisticRegression_accuracy = round(LogisticRegression_accuracy * 100, 2)
Dict['LogisticRegression_classifier'] = LogisticRegression_accuracy
filename = 'LogisticRegression.sav'
pickle.dump(LogisticRegression_classifier, open(filename, 'wb'))

LinearSVC_classifier, LinearSVC_accuracy = cm.LinearSVC_implemenation(
    X_train, X_test, Y_train, Y_test)
print('Linear SVC Accuracy:', LinearSVC_accuracy)
                    help='path of DEG results')
args = parser.parse_args()

#1. Preprocess dataset
##Gene filtering and augmentation
ex_gnames, ex_glists = preprocess.gene_filtering(args.refpath, args.genepath)
ex_value, ex_gene, ex_label = preprocess.extract_filtered_gene(
    args.datapath, ex_gnames)
metadatas = preprocess.get_metadata(ex_label)
egene, rlds, aug_ages, aug_types, sp_size = preprocess.data_augmentation(
    ex_value, ex_gene, metadatas)
#egene, rlds, aug_ages, aug_types, sp_size = preprocess.gaussian_augmentation(ex_value, ex_gene, metadatas, 141)
#print (egene.shape, rlds.shape, aug_ages.shape, aug_types.shape)

##Rescaling(normalization)
rcond_list, augcond_list = preprocess.indexing(aug_types,
                                               np.int(rlds.shape[0] / sp_size),
                                               sp_size)
re_rld, rld_mean, max_rld_std, std_re_rld = preprocess.rescaling(
    rlds, augcond_list)

##Split data
xtr, xte = preprocess.data_split('data_split.npz', re_rld, test_ratio=0.1)
#xtr, xte = preprocess.data_split('gaussian_data_split.npz', re_rld, test_ratio=0.1)
n_tr, n_te = len(xtr), len(xte)
print('xtr.shape:', xtr.shape, 'xte.shape:', xte.shape)

#2. Train WGAN+GP
wgangp = WGAN_GP(xtr, n_te)
dloss, gloss, genx = wgangp.train()
Esempio n. 4
0
    parser.add_argument("-bd","--bidirectional",type=bool,default=True)
    parser.add_argument("-f","--input_file_path",type=str)
    parser.add_argument("-n","--ngram_num",type=int,default=1)
    parser.add_argument("-e","--epoch_num",type=int,default=200)
    parser.add_argument("-sr","--split_rate",type=float,default=0.8)
    args = parser.parse_args()
    print(vars(args))
    embedding_dim = args.embedding_dim
    hidden_num = args.hidden_num
    num_layer = args.num_layer
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    dropout = args.dropout
    bidirectional = args.bidirectional
    input_file_path = "datasets/standard_files/" + args.input_file_path
    ngram_num = args.ngram_num
    epoch_num = args.epoch_num
    split_rate = args.split_rate

    Letter_dict = {}
    data = pd.read_csv(input_file_path,encoding="utf8")
    data = data.sample(frac=1)
    data.reset_index(drop=True)
    all_data = preprocess.df2list(data,"sequence","MIC","type",ngram_num,log_num=10)
    Letter_dict = preprocess.build_dict(all_data)
    weight_dict = torch.randn(len(Letter_dict)+1,embedding_dim)
    num1 = int(input_file_path[-13:-9])
    # num1 = 0
    num2 = 8407
    train_data,test_data = preprocess.data_split(all_data,int((num1+num2)*split_rate))
    main()