def select_indices(): print 'reading in features' test_features = Input.load_testdata_caffefeatures(padded=True) train_features = Input.load_traindata_caffefeatures(padded=True) print 'selecting indices' #get indices of features that have a non-zero variance in the test data selector1 = VarianceThreshold() selector1.fit_transform(test_features) indices_test = selector1.get_support(indices=True) #get indices of features that have a non-zero variance in the train data selector2 = VarianceThreshold() selector2.fit_transform(train_features) indices_train = selector2.get_support(indices=True) #only keep indices that have variance in both test and train data indices = list(set(indices_test) & set(indices_train)) #add 1 to all indices indices = [x + 1 for x in indices] #save indices to csv file myfile = open('caffefeature_indices_padded.csv', 'wb') wr = csv.writer(myfile) wr.writerow(indices)
def sort_dataframe(df_data, df_filenames): correct_order = Input.load_testdata_filenames() current_order = list(df_filenames.values) indices = [current_order.index(filename) for filename in correct_order] df_data = df_data.reindex(indices) df_data = df_data.reset_index() #reset index --> adds new indices, old indices become column 'index' return df_data.drop('index', axis=1) #remove this new column 'index'
def __init__(self, **kwargs): super().__init__(**kwargs) from copy import deepcopy new_kwargs = deepcopy(kwargs) del (new_kwargs["name"]) if "expression" in new_kwargs: del(new_kwargs["expression"]) if "central_name_gen" in new_kwargs: new_kwargs["central_name_gen"] = kwargs["central_name_gen"] self.taps = [] self.add_input(Input(name="clk", width=1, expression="", **new_kwargs)) self.input_list = kwargs["input_list"] num_resets = 0 num_sets = 0 num_toggles = 0 self.input_name_list = [] for one_char in kwargs["input_list"]: if one_char.lower() == "r": one_name = "rst_" + ("%04d" % num_resets) one_sig = Input(name=one_name, width=1, expression="", **new_kwargs) self.add_input(one_sig) self.add_device(one_sig) num_resets += 1 self.input_name_list.append(one_name) if one_char.lower() == "s": one_name = "set_" + ("%04d" % num_sets) one_sig = Input(name=one_name, width=1, expression="", **new_kwargs) self.add_input(one_sig) self.add_device(one_sig) num_sets += 1 self.input_name_list.append(one_name) if one_char.lower() == "t": one_name = "tog_" + ("%04d" % num_toggles) one_sig = Input(name=one_name, width=1, expression="", **new_kwargs) self.add_input(one_sig) self.add_device(one_sig) num_toggles += 1 self.input_name_list.append(one_name) out_sig = Signal(name="q_sig", width=1, expression="", **new_kwargs) self.signal_list.append(out_sig) self.add_output(Output(name="q", width=1, expression="", **new_kwargs))
def compute_logloss(df_filenames, df_data): #STEP 1: replace values replacer = lambda x: max(float(min(x, 0.999999999999)), 0.0000000000000001) df_data = df_data.applymap(replacer) #STEP 2: rescale df_subsum = df_data.sum(axis=1) df_sum = pd.concat([ df_subsum, df_subsum, df_subsum, df_subsum, df_subsum, df_subsum, df_subsum, df_subsum, df_subsum, df_subsum ], axis=1) df_sum.columns = [ 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9' ] df_data = df_data / df_sum #STEP 3: logloss #load correct validationset labels labels = Input.load_validationset_labels() df_labels = pd.get_dummies( labels) #to one-hot-encoding, DataFrame automatically df_labels.columns = [ 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9' ] #sort data to have same order as labels correct_order = Input.load_validationset_filenames() current_order = list(df_filenames.values) indices = [current_order.index(filename) for filename in correct_order] df_data = df_data.reindex(indices) df_data = df_data.reset_index( ) #reset index --> adds new indices, old indices become column 'index' df_data = df_data.drop('index', axis=1) #remove this new column 'index' #select probabilities of correct classes only df_sparse_probs = df_data * df_labels probs = df_sparse_probs.values probs = list(chain.from_iterable(probs)) #flatten list probs = filter(lambda x: x != 0, probs) #remove all zeros #apply log to them and take the average log_probs = [math.log(p) for p in probs] return -(np.mean(log_probs))
def __init__(self, **kwargs): super().__init__(**kwargs) from copy import deepcopy new_kwargs = deepcopy(kwargs) del(new_kwargs["name"]) del(new_kwargs["width"]) if "expression" in new_kwargs: del(new_kwargs["expression"]) if "central_name_gen" in new_kwargs: new_kwargs["central_name_gen"] = kwargs["central_name_gen"] self.children.append(BasicDelay(**kwargs)) self.taps = [] self.add_input(Input(name="clk", width=1, expression="", **new_kwargs)) self.add_input(Input(name="rst", width=1, expression="", **new_kwargs)) self.add_input(Input(name="d_in", width=self.children[0].width, expression="", **new_kwargs)) self.add_output(Output(name="d_out", width=self.children[0].width, expression="", **new_kwargs))
def main(): print("Searching csv files") directory_path = "./todosLosAngulos" file_searching_pattern = "**/*.csv" paths = Utilities.get_csv_filenames(directory_path, file_searching_pattern) db_path = "locomotionAnalysis.db" #print("Listing " + str(len(paths)) + " files \n"); input_list = [] # Create object Input based on csv files for path in paths: input_obj = Input(str(path)) input_obj.compute_metadata() input_list.append(input_obj) input_obj = input_list[0] print(input_obj.get_path()) input_obj.insert_steps_into_db(db_path) print("------------------------------")
def decompress(self, in_file_name, out_file_name=''): # assign output filename if (out_file_name == ''): out_file_name = in_file_name.split('.')[0] + ".png" else: out_file_name = out_file_name.split('.')[0] + ".png" print('Decompressing "%s" -> "%s"' % (in_file_name, out_file_name)) print('Reading...') stream = Input(in_file_name) decoder = Decoder(stream) # decode image dimensions height, width = decoder.decode_header() stream.flush() size_header = stream.bytes_read print('* Header: %d bytes' % size_header) # decode Huffman table tree = decoder.decode_tree() stream.flush() size_tree = stream.bytes_read - size_header print('* Tree: %d bytes' % size_tree) # decode image pixel data image = decoder.decode_pixels(height, width, tree) stream.close() size_pixels = stream.bytes_read - size_tree - size_header print('* Pixels: %d bytes' % size_pixels) size_read = stream.bytes_read print('Decompressed %d bytes.' % size_read) print('Image dimensions: %d x %dpx' % (width, height)) image.save(out_file_name) size_raw = raw_size(width, height) print('RAW size: %d bytes' % size_raw) space_expand = 100 * float(size_raw / size_read - 1) print('Memory expanded by %0.2f' % (space_expand), '%.')
import pandas as pd import time from sklearn.ensemble import RandomForestClassifier from IO import Input from IO import Output start_time = time.time() # load train data df_trainset_caf = Input.load_trainset_caffefeatures() df_trainset_lab = Input.load_trainset_labels() # Load test data df_validationset_caf = Input.load_validationset_caffefeatures() print("--- load data: %s seconds ---" % round((time.time() - start_time),2)) start_time = time.time() x_train = df_trainset_caf y_train = df_trainset_lab x_test = df_validationset_caf # Train model rf = RandomForestClassifier(n_estimators=500) rf.fit(x_train, y_train) print("--- train model: %s seconds ---" % round((time.time() - start_time),2)) start_time = time.time() # Predict
from IO import IO from Camera import Camera from IO import Input from IO import OutputClock from Image import Image import code print("Beholder v0.0.1") IO = IO() Camera = Camera() def takePhoto(): img = Image(Camera.takePhoto()) img.save() sumitomoInput = Input(24, "Sumitomo", takePhoto) clockTestOutput = OutputClock(25, "Fake Signal", 0.1) def repl(IO, Camera, takePhoto): code.interact(local=locals(), ) repl(IO, Camera, takePhoto)
from sklearn.feature_selection import chi2 from IO import Input import numpy as np import csv print 'loading data' X = Input.load_trainset_caffefeatures() Y = Input.load_trainset_labels() print 'compute chi2 values' chi, p = chi2(X, Y) chi = map((lambda x: np.inf if np.isnan(x) else x), chi) #make all nans into infs count_inf = (np.isinf(chi)).sum() print 'number of infinities: ' + str(count_inf) + ' of ' + str(len(chi)) print 'sort features on relevance' indices = np.argsort(chi) print 'save feature indices to csv' myfile = open('feature_importance_trainset_chi2.csv', 'wb') wr = csv.writer(myfile) wr.writerow(indices)
import numpy as np import pandas as pd #from Output import * import pickle #import xgboost as xgb print('loading data') #Load data x_traindata = pd.read_csv('HOG_features_train_8_16_1.csv', sep=',', header=None).values x_testdata = pd.read_csv('HOG_features_test_8_16_1.csv', sep=',', header=None).values #load classification y_traindata = np.asarray(Input.load_traindata_labels()) print('training classifier') #Train classifier clf = OneVsRestClassifier(SVC(kernel='poly', probability=True)) clf.fit(x_traindata, y_traindata) # now you can save it to a file with open('classifierpolytraindata_HOG_8_16_1.pkl', 'wb') as f: pickle.dump(clf, f) ## and later you can load it with open('classifierpolytraindata_HOG_8_16_1.pkl', 'rb') as f: clf = pickle.load(f) #Make predictions
'''Simple test file to test whether loading caffefeatures works properly. Selecting percentiles, selecting rows and giving error messages. @author: Diede Kemper''' from IO import Input features = Input.load_validationset_caffefeatures() print features.shape print 'should be: 8061x3983' features = Input.load_traindata_caffefeatures(userows=range(3000, 5500)) print features.shape print 'should be: 2500x3983' features = Input.load_validationset_caffefeatures( featureSelectionMethod='chi2', Percentile=100) print features.shape print 'should be: 8061x3983' features = Input.load_validationset_caffefeatures(featureSelectionMethod='hoi', Percentile=90) print features.shape print 'should print error message' features = Input.load_validationset_caffefeatures( featureSelectionMethod='chi2', Percentile=210) print features.shape print 'should print error message' features = Input.load_traindata_caffefeatures(featureSelectionMethod='chi2', Percentile=5) print features.shape
import pandas as pd import time from sklearn.ensemble import RandomForestClassifier from IO import Input from IO import Output start_time = time.time() # load train data df_traindata_caf = Input.load_traindata_caffefeatures() df_traindata_lab = Input.load_traindata_labels() # Load test data df_testdata_caf = Input.load_testdata_caffefeatures() print("--- load data: %s seconds ---" % round((time.time() - start_time), 2)) start_time = time.time() x_train = df_traindata_caf y_train = df_traindata_lab x_test = df_testdata_caf # Train model rf = RandomForestClassifier(n_estimators=500) rf.fit(x_train, y_train) print("--- train model: %s seconds ---" % round((time.time() - start_time), 2)) start_time = time.time() # Predict
from sklearn.multiclass import OneVsOneClassifier from sklearn.svm import SVC from sklearn.svm import NuSVC import numpy as np from IO import Output import pickle from sklearn.svm import LinearSVC ''' Helper function to use with the grouping of the dataframe, turns 3 rows of coordinates into a single row ''' def transformXY(coords): return pd.Series(np.asarray(coords).ravel()) #Load the file names of the various datasets trainset_filenames = Input.load_trainset_filenames() validationset_filenames = Input.load_validationset_filenames() traindata_filenames = Input.load_traindata_filenames() testset_filenames = Input.load_testdata_filenames() #Load the features feat = pd.read_csv('skinTrainFeatures.csv', index_col = 0) #Select the features for each dataset x_trainset = feat.ix[trainset_filenames] x_validationset = feat.ix[validationset_filenames] x_testset = feat.ix[testset_filenames] x_traindata = feat.ix[traindata_filenames] #Load the labels for each dataset y_trainset = np.asarray(Input.load_trainset_labels())
import pandas as pd import sys print 'reading in features' df = pd.read_csv('features_test_padded.csv', header=None) print 'Old dataframe' print df.head() # # TRAINDATA # #get filenames testdata_filenames = Input.load_testdata_filenames() caffefeatures_filenames = list(df[0].values) # check whether there are files without caffefeatures missing_filenames = list( set(testdata_filenames) - set(caffefeatures_filenames)) if not missing_filenames: #if there are no missing files print 'All testdata files have caffefeatures.' else: print str( len(missing_filenames)) + ' testdata files do not have caffefeatures' sys.exit( "Program execution is stopped, because not all testdata files have caffefeatures. First solve this bug!" ) # sort features on testdata filenames
import pandas as pd import sys print 'reading in features' df = pd.read_csv('features_train_padded.csv', header=None) print 'Old dataframe' print df.head() # # TRAINDATA # #get filenames traindata_filenames = Input.load_traindata_filenames() caffefeatures_filenames = list(df[0].values) # check whether there are files without caffefeatures missing_filenames = list( set(traindata_filenames) - set(caffefeatures_filenames)) if not missing_filenames: #if there are no missing files print 'All traindata files have caffefeatures.' else: print str( len(missing_filenames)) + ' traindata files do not have caffefeatures' sys.exit( "Program execution is stopped, because not all traindata files have caffefeatures. First solve this bug!" ) # sort features on traindata filenames