def test_kinect_classification_random_data(self): from sklearn.neural_network import MLPClassifier # retrieve mock classification data from data_processing import get_data X_train, y_train = get_data( data_dir='test_data/test_data_training.csv') # check correct data has been loaded and correct format of values self.assertEqual(X_train.dtype, 'float64') self.assertTrue('sitting' in y_train) self.assertTrue('standing' in y_train) self.assertTrue('laying' in y_train) # use default settings applied in script clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(8, 16), random_state=1, activation='logistic') clf.fit(X_train, y_train) X_test, y_test = get_data( data_dir='test_data/test_data_validation.csv') output = clf.predict(X_test) # check that only valid output is being passed for i in output: # check that only valid output is being passed position = (i == "standing" or i == "sitting" or i == "laying") self.assertTrue(position)
def train(): ''' 隐层 50: 表示词向量的宽度 输出 2 : 隐层用,输出是分类的个数 ''' # 获取训练集和测试集 train_set, target, test_set, test_target = data_processing.get_data() net = AnalyWithGRU(50, 2).to(device) # 定义神经网络 criterion = torch.nn.CrossEntropyLoss() # 设置 loss optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 设置优化器 for epoch in range(3): for i in range(train_set.size()[0]): encoder_hidden = net.init_hidden() input_data = torch.autograd.Variable(train_set[i]) output_labels = torch.autograd.Variable(torch.LongTensor([target[i]])) input_data, encoder_hidden = input_data.to(device), encoder_hidden.to(device) # 训练 encoder_outputs, encoder_hidden = net(input_data, encoder_hidden) # 优化器 optimizer.zero_grad() loss = criterion(encoder_outputs, output_labels.to(device)).to(device) loss.backward() optimizer.step() if i % 1000 == 0 or i==train_set.size()[0]-1: print("epoch: " + str(epoch+1) + "\t" + "loss: " + str(loss.item())) Accuracy(net, test_set, test_target) return
def main(args): """ Main function to be executed. """ # Data Processing start_time = time.time() print("Retrieving data...") train_loader, validation_loader = get_data(args) get_data_time = time.time() # Printing Experiment Details print_args(args) print("Data Loaded Successfully!\nData Processing Time: {:.3f} secs\n". format(get_data_time - start_time)) # Training model = LinearSVM() if torch.cuda.is_available(): model.cuda() print("Training...") train(model, train_loader, args) training_time = time.time() print( "End of training\nTraining Time: {:.3f} secs\n".format(training_time - get_data_time)) # Testing print("Validating...") confidence_levels = validation(model, validation_loader, args) validation_time = time.time() print("End of validation\nValidation Time: {:.3f} secs\n".format( validation_time - training_time)) print("Total Time Elapsed: {:.3f}secs\n".format(time.time() - start_time)) print("{0:-^31}\n".format("END")) # Plotting plot(confidence_levels)
def predict(ticker, period, gridsearch, verbosity): df = data_processing.get_data(ticker, period) data_processing.add_all_indicators(df) dfOriginal = df.copy() df = df.dropna() ''' features = ['Close', '% Volume', 'bb_bbh', 'bb_bbl', 'bb_avg', 'bb_bbh_ind', 'bb_bbl_ind', 'bb_pband', 'bb_wband', 'rsi', 'macd', 'macd_diff', 'macd_signal', 'don_h', 'don_l', 'don_m', 'don_p', 'don_w', 'ema_9', 'sma_5', 'sma_10', 'sma_15', 'sma_30', 'sma_50'] features = ['Close', '% Volume', 'macd_diff', 'rsi', 'ema_9', 'sma_5', 'sma_50'] ''' features = [ 'Close', '% Volume', 'bb_bbh', 'bb_bbl', 'bb_avg', 'bb_pband', 'bb_wband', 'rsi', 'macd', 'macd_diff', 'macd_signal', 'ema_9', 'sma_5', 'sma_10', 'sma_15', 'sma_30', 'sma_50' ] X = df[features].copy() y = df['% Change'].copy() X_train, X_val, y_train, y_val = train_test_split(X, y) model = functions.xgb_tuning(X_train, X_val, y_train, y_val, gridsearch) predictions = model.predict(X_val) mae = mean_absolute_error(y_val, predictions) score = r2_score(y_val, predictions) print('% MAE: {}'.format(mae)) print('R2 Score: {}'.format(score)) next = model.predict(dfOriginal.tail(1)[features]) print('Next: {}'.format(next)) model = functions.final_model(X, y, gridsearch) next = model.predict(dfOriginal.tail(1)[features]) print('Next Final: {}'.format(next)) return predictions
import torch.nn as nn from torch.utils.data import DataLoader, Dataset import KSUS import mnist_reader from data_processing import get_data device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) dir_path = os.path.dirname(os.path.realpath(__file__)) # Setting test data offset = 48 #000 test_size = 10 #00 _, data_test, data_demo = get_data(offset, test_size) h, w = data_test.shape[1], data_test.shape[2] class ksus_Dataset(Dataset): def __init__(self, datasetclean): self.clean = datasetclean def __len__(self): return len(self.clean) def __getitem__(self, idx): xClean = self.clean[idx] return xClean
data_path = '/Users/cengqiqi/Desktop/project/data/' track_features = pd.read_csv(data_path + 'tf_mini.csv') sessions = pd.read_csv(data_path + 'log_mini.csv') # get track dict track_dic = get_track_dic(track_features) ntracks = len(track_dic) # get train data, validation data and test data # Note: you should build track dictionary using function get_track_dic before get data sessions_train = sessions[0:150000] sessions_val = sessions[150000:160000] sessions_test = sessions[160000:167880] train_data_raw, train_label = get_data(batch_size, sessions_train, seq_len) val_data_raw, val_label = get_data(batch_size, sessions_val, seq_len) test_data_raw, test_label = get_data(batch_size, sessions_test, seq_len) ############################################################################### # prepare data for track embedding in pytorch ############################################################################### # for word embedding, find the track position in track_dic track_dic_index = {} tracks = list(track_dic.keys()) for i in range(len(tracks)): track_dic_index[tracks[i]] = i # convert data: represented by index
from data_processing import get_data import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier from sklearn import ensemble from sklearn.externals import joblib data, labels = get_data() print("\nAda Boost") avg_train = 0 avg_test = 0 fold_number = 0 #Do K-Fold cross validation on the data from sklearn.model_selection import KFold kf = KFold(n_splits=10, shuffle=True) for train_indices, test_indices in kf.split(data): #Split the data into training and testing X_train = [data[ii] for ii in train_indices] X_test = [data[ii] for ii in test_indices] Y_train = [labels[ii] for ii in train_indices] Y_test = [labels[ii] for ii in test_indices] #Define the classifier clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200)
import matplotlib.pyplot as plt from sklearn.utils import shuffle from data_processing import get_data ## indication matrix def y2indicator(y, K): N = len(y) ind = np.zeros((N, K)) for i in range(N): ind[i, y[i]] = 1 return ind Xtrain, Ytrain, Xtest, Ytest = get_data() D = Xtrain.shape[1] K = len(set(Ytrain) | set(Ytest)) M = 5 # num hidden units # convert to indicator Ytrain_ind = y2indicator(Ytrain, K) Ytest_ind = y2indicator(Ytest, K) # randomly initialize weights W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K)
raise Exception("no weights given") saver.restore(sess, DST) gaussian_mean = sess.run(gauss_global_mean, feed_dict={}) #pdb.set_trace() bernouilli_mean = sess.run(svae_.y_generate_mean, feed_dict={normal_mean: gaussian_mean}) bernouilli_mean = np.transpose( np.reshape(bernouilli_mean, (nexamples, K, IMAGE_SIZE, IMAGE_SIZE)), (1, 0, 2, 3)) save_gene(bernouilli_mean, "./generate") if __name__ == '__main__': ###### Load and get data ###### train_data, test_data = data_processing.get_data("MNIST") #train_data = train_data[:40000] """ data = shuffle(data_processing.get_data()) #data = data[:1*BATCH_SIZE] data = data[:10000] """ # Convert to binary print("Converting data to binary") train_data = data_processing.binarize(train_data) test_data = data_processing.binarize(test_data) main(nets_archi, train_data, test_data, "training", "full") """ TODO options, arguments = parser.parse_args(sys.argv) if options.model not in models.keys():
import data_processing as dp import cv2 pairs, labels = dp.get_data(10, 4, True) for pair in pairs: cv2.imshow('image', pair[0]) cv2.waitKey(0) cv2.destroyAllWindows() print(len(pairs)) print(len(labels))
import KSUS import mnist_reader from data_processing import get_data #-------------------------prerequisite----------------------------------------------------------------------------------------- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) dir_path = os.path.dirname(os.path.realpath(__file__)) #-------------------------data_preprocessing----------------------------------------------------------------------------------------- train_size = 48 #000 test_size = 10 #00 data_train, data_test, data_demo = get_data(train_size, test_size) h, w = data_train.shape[1], data_train.shape[2] #-----------------------------dataset---------------------------------------------------------------------------------------- class ksus_Dataset(Dataset): def __init__(self, datasetclean): self.clean = datasetclean def __len__(self): return len(self.clean) def __getitem__(self, idx): xClean = self.clean[idx] return xClean
# If the user wants to write to an output file if args.output is not None: if os.path.isfile(args.output): f = open(args.output, 'a+') f.write(string_humans) f.close() # If the output file does not exist yet, first populate the string with category headings else: f = open(args.output, 'a+') f.write( "position,noseX,noseY,neckX,neckY,rshoulderX,rshoulderY,relbowX,relbowY,rwristX," "rwristY,lshoulderX,lshoulderY,lelbowX,lelbowY,lwristX,lwristY,midhipX,midhipY,rhipX," "rhipY,rkneeX,rkneeY,rankleX,rankleY,lhipX,lhipY,lkneeX,lkneeY,lankleX,lankleY,reyeX," "reyeY,leyeX,leyeY\n" + string_humans) f.close() X_train, y_train = get_data() X_test, y_test = get_data(data_dir=args.output) clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(8, 16), random_state=1, activation='logistic') clf.fit(X_train, y_train) print("The output results: ") print(clf.predict(X_test)) print(" ") try: os.remove(args.output) except: pass
# Helper Functions #################### def multiple_string_lines(title, threshold): if (len(title) > threshold): words_arr = title.split(' ') interval = int(len(words_arr) / 2) return ' '.join(words_arr[0:interval]) + '<br>' + ' '.join( words_arr[interval:len(words_arr)]) else: return title #################### # Get Data #################### df = dp.get_data() df_long = dp.get_long_data() period_mean = dp.get_period_mean_data() period_total_perc = dp.get_period_total_perc_data() df_incidents = dp.get_formatted_incident_rate_perc_changed_by_airline_data() df_fatal = dp.get_formatted_fatal_rate_perc_changed_by_airline_data() #################### # Color Theme for Graphs #################### period_colors = {'1985-1999': '#33626C', '2000-2014': '#C1D6E2'} other_colors = ['#5F5B6E', '#312932', '#458CA5', '#F6A941'] #################### # Static Graphs
from sklearn.pipeline import FeatureUnion from sklearn import model_selection from confusion_matrix import plot_confusion_matrix from data_processing import get_data, clean_sentences from metric_labeling import metric_labeling, train_nnc from polarity_feature import get_polarity_features from rest_features import get_features from sampling import undersample, oversample, split_validation from tune_params import tune_params from util import Author from scipy.sparse import csr_matrix import pandas as pd # Load preprocessed data. data, sentences = get_data(Author.SCHWARTZ) y = data.iloc[:, 1].values possible_labels = np.unique(y) # Split training data indices = range(len(sentences)) data_train, data_test, labels_train, labels_test, i_train, i_test = train_test_split( sentences, y, indices, test_size=0.20, random_state=42) full_sentences_train = [ re.findall('.*?[.!\?#]', data.iloc[i, 2] + "#") for i in i_train ] full_sentences_test = [ re.findall('.*?[.!\?#]', data.iloc[i, 2] + "#") for i in i_test ] for review_sentences in full_sentences_train: