def __init__(self): reader = DataReader(1,5) name ="ECG5000" + "/" + "ECG5000" self.x_train, self.y_train = reader.read_train_data('../UCR_TS_Archive_2015/'+name+"_TEST") self.x_test, self.y_test = reader.read_test_data("../UCR_TS_Archive_2015/"+name+"_TRAIN") self.x_input, self.y_input = reader.temp_read_test_data("../UCR_TS_Archive_2015/"+name+"_TRAIN")
def __init__(self): reader = DataReader(1,3) name ="StarLightCurves" + "/" + "StarLightCurves" self.x_train, self.y_train = reader.read_train_data('../UCR_TS_Archive_2015/'+name+"_TRAIN") self.x_test, self.y_test = reader.read_test_data("../UCR_TS_Archive_2015/"+name+"_TEST") self.x_input, self.y_input = reader.temp_read_test_data("../UCR_TS_Archive_2015/"+name+"_TEST")
def __init__(self): reader = DataReader(187, 6) self.x_train, self.y_train = reader.read_train_human('./train/') self.x_test, self.y_test = reader.read_test_human("./test/") self.x_input, self.y_input = reader.temp_read_test_human("./test/")
def pre_load_fixed_data(): keywords = [ "music", "food", "sport", "show", "movie", "car", "commercial", "party", "war", "hello" ] data = DataReader() return data.read("static/data/tweets.txt", keywords)
def get_libsvm_data(self): reader = DataReader() self.x_train, self.y_train = reader.temp_read_train_data( '../UCR_TS_Archive_2015/wafer/wafer_TRAIN2') self.x_test, self.y_test = reader.temp_read_test_data( '../UCR_TS_Archive_2015/wafer/wafer_TEST2') self.y_train = self.y_train - 1 self.y_test = self.y_test - 1
def get_libsvm_data(self): reader = DataReader() #self.x_train, self.y_train = reader.temp_read_train_data('../UCR_TS_Archive_2015/ECG5000/ECG5000_TEST') #self.x_test, self.y_test = reader.temp_read_test_data('../UCR_TS_Archive_2015/ECG5000/ECG5000_TRAIN') self.x_train, self.y_train = reader.temp_read_train_human( '../human/train/') self.x_test, self.y_test = reader.temp_read_test_human( "../human/test/")
def __init__(self): reader = DataReader(152, 2) name = "wafer" + "/" + "wafer" self.x_train, self.y_train = reader.read_train_data( '../UCR_TS_Archive_2015/' + name + "_TRAIN2") self.x_test, self.y_test = reader.read_test_data( "../UCR_TS_Archive_2015/" + name + "_TEST2") self.x_input, self.y_input = reader.temp_read_test_data( "../UCR_TS_Archive_2015/" + name + "_TEST2")
def test_read_function(url): """ Test method for read method in read_data class with given url, does not crash if it passed """ web_data = DataReader(url) data, columns = web_data.read() assert_equals(True, len(data) > 0) assert_equals(True, len(columns) > 0) assert_equals(252, len(data)) assert_equals(15, len(columns)) assert_equals(23, data.loc[0, 'Age (years)']) assert_equals('Weight (lbs)', columns[3]) return data, columns
def __init__(self,input_num=3): self.reader = DataReader() self.train = list() self.success = [] self.conf_matr = dict() for i in range(1,input_num): temp =[[0 for i in range(10)] for j in range(10)] self.conf_matr[i]=temp for i in range(1,input_num): self.knn(i) for i in range(1,input_num): for j in range(10): for k in range(10): self.conf_matr[i][j][k]=((self.conf_matr[i][j][k])/(len(self.reader.test_data[str(j)])))*100 for i in range(1,input_num): print() print('for k as ', i) print('accuracy is ', (self.success[i-1]/444) * 100) print('the confusion matrix is ') print() for j in range(10): print(self.conf_matr[i][j]) print()
def main(): sns.set(font_scale=0.7) # Process Data url = 'http://lib.stat.cmu.edu/datasets/bodyfat' web_data = DataReader(url) data, columns = web_data.read() # Plotting all_correlation = correlation_chart(data, columns) graphs(data, all_correlation) x = data.loc[:, 'Age (years)':'Wrist circumference (cm)'] y = data["Percent body fat from Siri's (1956) equation"].to_numpy() # Linear Regression x_train, x_test, y_train, y_test = \ train_test_split(x, y, test_size=0.4, random_state=1) model = DecisionTreeRegressor() model.fit(x_train, y_train) linear_reg_model = linear_regression_fit(x_train, y_train) print('MSE for linear train:', mean_squared_error(y_train, linear_reg_model.predict(x_train))) print('MSE for linear test:', mean_squared_error(y_test, linear_reg_model.predict(x_test))) print('MSE for decisiontree train:', mean_squared_error(y_train, model.predict(x_train))) print('MSE for decisiontree test:', mean_squared_error(y_test, model.predict(x_test))) # High correlation part x_high_correlation = data[high_correlation(all_correlation)].copy() x_high_train, x_high_test, y_high_train, y_high_test = \ train_test_split(x_high_correlation, y, test_size=0.4, random_state=1) high_model = DecisionTreeRegressor() high_model.fit(x_high_train, y_high_train) high_correlation_model = linear_regression_fit(x_high_train, y_high_train) print( 'MSE for high correlation train:', mean_squared_error(y_high_train, high_correlation_model.predict(x_high_train))) print( 'MSE for high correlation test:', mean_squared_error(y_high_test, high_correlation_model.predict(x_high_test))) print('MSE for high correlation decisiontree train:', mean_squared_error(y_high_train, high_model.predict(x_high_train))) print('MSE for high correlation decisiontree test:', mean_squared_error(y_high_test, high_model.predict(x_high_test)))
def classify(self, validation_path): data_reader = DataReader(validation_path) tweets, labels = data_reader.unprocessed_tweet, data_reader.labels confusion = np.zeros((2, 2)) eq = 0 for i in range(len(tweets)): n_negative = 0 n_positive = 0 split_tweet = tweets[i].split() for tweet in split_tweet: token = tweet.lower().strip() token = re.sub(r'\s[^\s\w]+\s', ' ', token) token = re.sub(r'\d+\s?|\n|[^\s\w]', '', token) if token in self.negative_words: n_negative += 1 if token in self.positive_words: n_positive += 1 if n_negative == n_positive == 0: print(split_tweet) eq += 1 prediction = 1 if n_negative > n_positive: prediction = 0 correct = 1 if labels[i] == 4 else 0 confusion[prediction][correct] += 1 print(' Real class') print(' ', end='') print(' '.join('{:>8d}'.format(i) for i in range(2))) for i in range(2): if i == 0: print('Predicted class: {:2d} '.format(i), end='') else: print(' {:2d} '.format(i), end='') print(' '.join('{:>8.3f}'.format(confusion[i][j]) for j in range(2))) for i in range(2): recall = confusion[i, i] / sum(confusion[:, i]) precision = confusion[i, i] / sum(confusion[i, :]) print('Class %i: Recall=%0.6f, Precision=%0.6f' % (i, recall, precision)) print('Accuracy=%.06f' % ((confusion[0, 0] + confusion[1, 1]) / (np.sum(confusion)))) print(eq / np.sum(confusion))
def __init__(self, epochs=2): # randomize weight vectors self.weights = dict() for i in range(10): self.weights[i] = list(np.random.random((32, 32))) # print(self.weights[i]) self.reader = DataReader() self.n = 0.1 accs = list() self.conf_matrices = {} for k in range(epochs): self.conf_matrices[k] = [[0 for i in range(10)] for j in range(10)] # epochs, # of passes through training data for j in range(epochs): self.train() accs.append(((self.test(j)) / 444) * 100) max_ep = 0 mx_acc = 0 for i in range(epochs): for j in range(10): for k in range(10): self.conf_matrices[i][j][k] = ( (self.conf_matrices[i][j][k]) / (len(self.reader.test_data[str(j)]))) * 100 for k in range(len(accs)): if accs[k] > mx_acc: mx_acc = accs[k] max_ep = k print('max accuracy for test set: ', mx_acc, " for no of epochs = ", max_ep) plt.plot([(i + 1) for i in range(epochs)], accs, 'r--') plt.show() for l in range(epochs): print("The accuracy for the epoch ", l + 1, " is ", accs[l]) print("the conf matrix is") cur_arrr = self.conf_matrices[l] for m in cur_arrr: for n in m: print("%.2f" % n, end=" ") print()
from write_tfrecords import write_class_data_to_tfrecords from read_data import DataReader #write_data_to_tfrecords(path, 'test_new', [0], 3) data_path = 'D:/data/SHREC15_nonrigid/SHREC15NonRigidTestDB/cleaned/output/' output_path = 'E:/data/TFRecords/' raw_label_path = "D:/data/SHREC15_nonrigid/test.cla" label_path = "D:/data/SHREC15_nonrigid/labels.txt" reader = DataReader() class_dict = reader.read_raw_labels(raw_label_path) class_all = list(class_dict.keys()) train_set = [] test_set = [] for class_name in class_all: train_set = train_set + class_dict[class_name][4:24] test_set = test_set + class_dict[class_name][0:4] write_class_data_to_tfrecords(data_path, label_path, output_path, 'train_50class_gridinput', train_set, start_level=0, level_num=3) print("Training DATA finished") write_class_data_to_tfrecords(data_path, label_path, output_path,
Project: https://github.com/aymericdamien/TensorFlow-Examples/ ''' from __future__ import print_function import tensorflow as tf from tensorflow.python.ops import rnn, rnn_cell import pandas as pd import numpy as np import sys sys.path.append("../script") from convert_input import Convert from validation import Validation from read_data import DataReader reader = DataReader(1, 2) name = "wafer" x_train, y_train = reader.read_train_data('../UCR_TS_Archive_2015/' + name + "/" + name + "_TRAIN2") x_test, y_test = reader.read_test_data("../UCR_TS_Archive_2015/" + name + "/" + name + "_TEST2") x_input, y_input = reader.temp_read_test_data("../UCR_TS_Archive_2015/" + name + "/" + name + "_TEST2") x_train = np.transpose(x_train, [1, 0, 2]) x_test = np.transpose(x_test, [1, 0, 2]) ''' To classify images using a recurrent neural network, we consider every image
def main(): parser = argparse.ArgumentParser(description='main') group = parser.add_mutually_exclusive_group() group.add_argument('--train', '-t', type=str, help='file from which to train sentiment classification') group.add_argument('--load', '-l', type=str, help='file from which to load sentiment classification') parser.add_argument('--destination', '-d', type=str, help='file in which to store the sentiment classification') parser.add_argument('--classify', '-c', type=str, help='file from which to classify tweets') parser.add_argument('--generate', '-g', type=str, help='file from which to read language model') parser.add_argument('--validate', '-v', type=str, help='file from which to validate sentiment classification') parser.add_argument('--test', '-test', type=str, help='file from which to test sentiment classification') arguments = parser.parse_args() nb_class = NBClassifier() if arguments.train: data_reader = DataReader(arguments.train) tweets, labels = data_reader.tweets, data_reader.labels nb_class.train(tweets, labels) if arguments.destination: nb_class.write_to_file(arguments.destination) if arguments.load: nb_class.read_from_file(arguments.load) print('Loaded parameters from %s' % arguments.load) if arguments.validate and (arguments.load or arguments.train): print('Reading data from %s to validate' % arguments.validate) data_reader = DataReader(arguments.validate) tweets, labels = data_reader.tweets, data_reader.labels print('Done reading, starting validation') confusion = np.zeros((2, 2)) for i in range(len(tweets)): prediction = nb_class.predict(data_reader.tweets[i]) prediction = 1 if prediction == 4 else 0 correct = 1 if labels[i] == 4 else 0 confusion[prediction][correct] += 1 print(' Real class') print(' ', end='') print(' '.join('{:>8d}'.format(i) for i in range(2))) for i in range(2): if i == 0: print('Predicted class: {:2d} '.format(i), end='') else: print(' {:2d} '.format(i), end='') print(' '.join('{:>8.3f}'.format(confusion[i][j]) for j in range(2))) for i in range(2): recall = confusion[i, i] / sum(confusion[:, i]) precision = confusion[i, i] / sum(confusion[i, :]) print('Class %i: Recall=%0.6f, Precision=%0.6f' % (i, recall, precision)) print('Accuracy=%.06f' % ((confusion[0, 0] + confusion[1, 1]) / (np.sum(confusion)))) if arguments.classify and (arguments.load or arguments.train): trump_data_reader = TrumpDataReader(arguments.classify) tweets = trump_data_reader.tweets_training pos_index = [] neg_index = [] for i in range(len(tweets)): prediction = nb_class.predict(tweets[i]) prediction = 1 if prediction == 4 else 0 if prediction == 1: pos_index.append(i) elif prediction == 0: neg_index.append(i) positive_tweets = np.take(trump_data_reader.tweets_generating, pos_index) negative_tweets = np.take(trump_data_reader.tweets_generating, neg_index) path = arguments.classify[:arguments.classify.find('.')] with codecs.open(path + "_positive.txt", 'w', 'UTF-8') as f: for row in positive_tweets: f.write(row + "\n") with codecs.open(path + "_negative.txt", 'w', 'UTF-8') as f: for row in negative_tweets: f.write(row + "\n") if arguments.generate: generator = Generator() generator.read_model(arguments.generate) print("Tweet 1:") generator.generate("TWEET_START_SIGN") print("Tweet 2:") generator.generate("TWEET_START_SIGN") print("Tweet 3:") generator.generate("TWEET_START_SIGN")
def get_libsvm_data(self): reader = DataReader() self.x_train, self.y_train = reader.temp_read_train_data( '../UCR_TS_Archive_2015/StarLightCurves/StarLightCurves_TRAIN') self.x_test, self.y_test = reader.temp_read_test_data( '../UCR_TS_Archive_2015/StarLightCurves/StarLightCurves_TEST')
from read_data import DataReader from sklearn import svm from sklearn.metrics import accuracy_score from sklearn.neighbors import KNeighborsClassifier a=DataReader() x_train=list() y_train=list() for i in a.train_data.keys(): for j in a.train_data[i]: x_train.append(j) y_train.append(int(i)) # print(int(i)) x_train_flat = list() for i in x_train: temp = list() for j in i: temp.append(j) x_train_flat.append(temp) x_test=list() y_test=list() for i in a.test_data.keys(): for j in a.test_data[i]: x_test.append(j) y_test.append(int(i)) x_test_flat = list() for i in x_test:
def _get_data(self): print("Getting Data..") d = DataReader() self.train = d.train self.test = d.test self.ground_truth = d.ground_truth
Project: https://github.com/aymericdamien/TensorFlow-Examples/ ''' from __future__ import print_function import tensorflow as tf from tensorflow.python.ops import rnn, rnn_cell import pandas as pd import numpy as np import sys sys.path.append("../script") from convert_input import Convert from validation import Validation from read_data import DataReader reader = DataReader(1,6) x_train, y_train = reader.read_train_human('../human/train/') x_test, y_test = reader.read_test_human("../human/test/") x_input, y_input = reader.temp_read_test_human("../human/test/") x_train = np.transpose(x_train, [1,0,2]) x_test = np.transpose(x_test, [1,0,2]) ''' To classify images using a recurrent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample.
def write_mesh_scannet_to_tfrecords(dir_path, label_path, output_path, record_name, mesh_list, start_level=0, level_num=3): reader = DataReader() writer = tf.python_io.TFRecordWriter(output_path + record_name + '.tfrecords') count = 0 one_iter_time = 0 random.shuffle(mesh_list) total_num = len(mesh_list) for mesh_name in mesh_list: print("Start mesh " + mesh_name) time_start = time.time() shape_list = [] maxpool_indices_list = [] maxpool_offset_list = [] maxpool_arg_list = [] conv_indices_list = [] conv_weight_list = [] conv_offset_list = [] conv_shape_list = [] unpool_indices_list = [] for j in range(start_level, start_level + level_num): para_name = dir_path + mesh_name + '_' + str(j) + '_pad.para' para_shape, indices, axis_indices, weights = reader.read_para( para_name) shape_list = shape_list + para_shape indices = decode_intlist_from_bytes(indices) axis_indices = decode_intlist_from_bytes(axis_indices) weights = decode_doublelist_from_bytes(weights) if (j == start_level): mesh_path = dir_path + mesh_name + '_' + str(j) + '.obj' mesh = openmesh.read_trimesh(mesh_path) mesh.update_vertex_normals() normal_list = mesh.vertex_normals() points = mesh.points() z_list = points[:, 2] rgb_list, label = reader.read_rgb_label(label_path + mesh_name + '.rgbl') #label = [0] * shape_list[0] label = flat_list(label) if (len(label) != shape_list[0]): print("[Error]Unequal v_num") return #print("decode finished") conv_indices = Conv_Matrix_arg(para_shape, indices, axis_indices, weights) conv_offset_list.append(len(conv_indices)) conv_indices = flat_list(conv_indices) conv_indices_list = conv_indices_list + conv_indices conv_weight_list = conv_weight_list + weights if (j > start_level): hrch_name = dir_path + mesh_name + '_' + str(j) + '.hrch' cvt_nums, cover_vts = reader.read_hrch(hrch_name) hrch_axis_name = dir_path + mesh_name + '_' + str(j) + '.pool' cover_vts_axis = reader.read_pool(hrch_axis_name) maxpool_arg, maxpool_indices = MaxPooling_Matrix_arg( shape_list[(j - start_level - 1) * 4], shape_list[(j - start_level) * 4], para_shape[1], cvt_nums, cover_vts, cover_vts_axis) maxpool_arg_list.append(maxpool_arg) #flat maxpool_offset_list.append(len(maxpool_indices)) maxpool_indices = flat_list(maxpool_indices) maxpool_indices_list = maxpool_indices_list + maxpool_indices unpooling_indices = AveUnPooling_Matrix_arg( shape_list[(j - start_level - 1) * 4], shape_list[(j - start_level) * 4], para_shape[1], cvt_nums, cover_vts, cover_vts_axis) unpooling_indices = flat_list(unpooling_indices) unpool_indices_list = unpool_indices_list + unpooling_indices new_feature = { 'mesh_name': _bytes_feature(mesh_name.encode()), 'shape': _bytes_feature(IntList_to_Bytes(shape_list)), 'label': _bytes_feature(IntList_to_Bytes(label)), 'z': _bytes_feature(Float32List_to_Bytes(z_list)), 'normal': _bytes_feature(Float32List_to_Bytes(normal_list)), 'rgb': _bytes_feature(Float32List_to_Bytes(rgb_list)), 'maxpool/offset': _bytes_feature(IntList_to_Bytes(maxpool_offset_list)), 'maxpool/arg': _bytes_feature(IntList_to_Bytes(maxpool_arg_list)), 'maxpool/indices': _bytes_feature(IntList_to_Bytes(maxpool_indices_list)), 'unpooling/indices': _bytes_feature(IntList_to_Bytes(unpool_indices_list)), 'conv/offset': _bytes_feature(IntList_to_Bytes(conv_offset_list)), 'conv/indices': _bytes_feature(IntList_to_Bytes(conv_indices_list)), 'conv/weights': _bytes_feature(Float32List_to_Bytes(conv_weight_list)) } #height scaled input_feature = np.array([], dtype=np.float32) for j in range(start_level, start_level + level_num): feature_name = dir_path + mesh_name + '_' + str( j) + '_reweighted.input' feature_channel, grid_feature = reader.read_grid_feature( feature_name) grid_feature = np.reshape(grid_feature, (-1, 4)) grid_feature = grid_feature[:, 3] input_feature = np.concatenate((input_feature, grid_feature), axis=0) new_feature['input_feature'] = _bytes_feature( Float32List_to_Bytes(input_feature)) new_feature['feature_channel'] = _int64_feature(feature_channel) example = tf.train.Example(features=tf.train.Features( feature=new_feature)) writer.write(example.SerializeToString()) time_end = time.time() one_iter_time = ( (time_end - time_start) + one_iter_time * count) / (count + 1) time_left = one_iter_time * (total_num - count) print("Finish mesh " + mesh_name) print(str(100.0 * count / total_num) + "%" + "finished!") print("%.2f min left!" % (time_left / 60)) count = count + 1 writer.close()
def write_class_data_to_tfrecords(dir_path, label_path, output_path, record_name, mesh_list, start_level=0, level_num=3): reader = DataReader() labels = reader.read_labels(label_path) writer = tf.python_io.TFRecordWriter(output_path + record_name + '.tfrecords') count = 0 one_iter_time = 0 random.shuffle(mesh_list) total_num = len(mesh_list) for i in mesh_list: time_start = time.time() label = labels[i] shape_list = [] maxpool_indices_list = [] maxpool_offset_list = [] maxpool_arg_list = [] conv_indices_list = [] conv_weight_list = [] conv_offset_list = [] for j in range(start_level, start_level + level_num): para_name = dir_path + 'T' + str(i) + '_' + str(j) + '_pad.para' para_shape, indices, axis_indices, weights = reader.read_para( para_name) #print(shape) shape_list = shape_list + para_shape #print("shape_list:") #print(shape_list) indices = decode_intlist_from_bytes(indices) axis_indices = decode_intlist_from_bytes(axis_indices) weights = decode_doublelist_from_bytes(weights) #print("decode finished") conv_indices = Conv_Matrix_arg(para_shape, indices, axis_indices, weights) #print("finish build matrix") #flat conv_offset_list.append(len(conv_indices)) conv_indices = flat_list(conv_indices) conv_indices_list = conv_indices_list + conv_indices conv_weight_list = conv_weight_list + weights #print(conv_offset_list) #print(len(weights)) if (j > 0): hrch_name = dir_path + 'T' + str(i) + '_' + str(j) + '.hrch' cvt_nums, cover_vts = reader.read_hrch(hrch_name) hrch_axis_name = dir_path + 'T' + str(i) + '_' + str( j) + '.pool' cover_vts_axis = reader.read_pool(hrch_axis_name) maxpool_arg, maxpool_indices = MaxPooling_Matrix_arg( shape_list[(j - 1) * 4], shape_list[j * 4], para_shape[1], cvt_nums, cover_vts, cover_vts_axis) maxpool_arg_list.append(maxpool_arg) #flat maxpool_offset_list.append(len(maxpool_indices)) maxpool_indices = flat_list(maxpool_indices) maxpool_indices_list = maxpool_indices_list + maxpool_indices new_feature = { 'label': _int64_feature(label), 'mesh_id': _int64_feature(i), 'shape': _bytes_feature(IntList_to_Bytes(shape_list)), 'maxpool/offset': _bytes_feature(IntList_to_Bytes(maxpool_offset_list)), 'maxpool/arg': _bytes_feature(IntList_to_Bytes(maxpool_arg_list)), 'maxpool/indices': _bytes_feature(IntList_to_Bytes(maxpool_indices_list)), 'conv/offset': _bytes_feature(IntList_to_Bytes(conv_offset_list)), 'conv/indices': _bytes_feature(IntList_to_Bytes(conv_indices_list)), 'conv/weights': _bytes_feature(Float32List_to_Bytes(conv_weight_list)) } feature_name = dir_path + 'T' + str(i) + '_0_reweighted.input' feature_channel, grid_feature = reader.read_grid_feature(feature_name) new_feature['input_feature'] = _bytes_feature( Float32List_to_Bytes(grid_feature)) new_feature['feature_channel'] = _int64_feature(feature_channel) example = tf.train.Example(features=tf.train.Features( feature=new_feature)) writer.write(example.SerializeToString()) time_end = time.time() one_iter_time = ( (time_end - time_start) + one_iter_time * count) / (count + 1) time_left = one_iter_time * (total_num - count) print("Finish mesh " + str(i)) print(str(100.0 * count / total_num) + "%" + "finished!") print("%.2f min left!" % (time_left / 60)) count = count + 1 writer.close()