def __init__(self, data, unit=1, error=None, mask=None, dimensions=None, longname=u"Sampled Field", shortname=u"\\Psi", attributes=None, rescale=False): DataContainer.__init__(self, longname, shortname, attributes) self.data = data self.mask = mask try: if isinstance(unit, (str, unicode)): unit = unit.replace('^', '**') if isinstance(unit, unicode): unit = unit.encode('utf-8') self.unit = Quantity(unit) except: try: self.unit = Quantity("1"+unit) except: self.unit = unit self.error = error if dimensions != None: self.dimensions = dimensions else: N = len(data.shape)-1 self.dimensions = [generateIndex(N-i,n) for i,n in enumerate(data.shape)] if rescale: self.rescale() for dim in self._dimensions: dim.rescale() assert self.isValid()
def __SetNewData(self, data_container, case_index): array, label, feature_name, case_name = data_container.GetData() new_array = array[case_index, :] new_label = label[case_index] new_case_name = [case_name[i] for i in case_index] new_data_container = DataContainer(array=new_array, label=new_label, case_name=new_case_name, feature_name=feature_name) new_data_container.UpdateFrameByData() return new_data_container
def run(self): data, focal, pp = self.read_pkl_file() tfl_man = TFL_manager(pp, focal) prev_container = self.init_prev_container( self.frames[0], focal, pp, tfl_man, np.array(data['points_' + str(self.frames[0][31:33])][0])) for i in range(1, len(self.frames)): curr_frame = self.frames[i] prev_frame = self.frames[i - 1] curr_img_path = curr_frame EM = self.read_EM(prev_frame[31:33], curr_frame[31:33], data) curr_container = DataContainer( curr_img_path, np.array(data['points_' + str(curr_frame[31:33])][0]), EM) candidates, auxiliary = tfl_man.light_attention(curr_container) # print(candidates,"\n", auxiliary) tfl_man.tfl_detection(curr_container, candidates, auxiliary) curr_container = tfl_man.calc_distance(curr_container, prev_container) print(prev_container, "\n", curr_container) # visualize(prev_container, curr_container, focal, pp) prev_container = curr_container
def readline(self, key='', name=''): """Scans the file for single lines containing key at the start and name somewhere in the line. The lines are stored in separate DataContainers in a list key [] name [] Parameters: key - (str, optional) first tag in line name - (str, optional) word in the line Returns: list containing DataContainers with read data for each line found """ self.filePointer.seek(0) # restarts reading the file start = [key, name] containerList = [] #start reading for line in self.filePointer: line = self.__removeComment(line) if start[0] in line[:len(start[0])] and start[1] in line: dataStructures = [line.split()] dataStructures = nestedListToDictionary(dataStructures) dataStructures = DataContainer(dataStructures) containerList.append(dataStructures) return containerList
def __loadConfig(self): import config configvars = [var for var in dir(config) if not var.startswith('__')] d = {} for var in configvars: exec('d[var] = config.' + var) return DataContainer(d)
def loadData(self, name="train"): fin = self.__gzOpen(self.filename, "r") data = [] classes = [] hasclass = False # get the different section of ARFF-File for line in fin: sline = line.strip().lower() if sline.startswith("%") or len(sline) == 0: continue if sline.startswith("@data"): break if sline.startswith("@attribute"): value = sline.split() if value[1].startswith("class"): hasclass = True else: data.append([]) #read in the data stored in the ARFF file for line in fin: sline = line.strip() if sline.startswith("%") or len(sline) == 0: continue values = sline.split(",") if hasclass: classes.append(float(values[-1])) values = values[:-1] for i in xrange(len(values)): data[i].append(float(values[i])) # cleaning up and return fin.close() dim = len(data) size = len(data[0]) dataMatrix = DataMatrix(size, dim) tempVector = DataVector(dim) valuesVector = DataVector(size) for rowIndex in xrange(size): for colIndex in xrange(dim): tempVector[colIndex] = data[colIndex][rowIndex] dataMatrix.setRow(rowIndex, tempVector) valuesVector[rowIndex] = classes[rowIndex] return DataContainer(points=dataMatrix, values=valuesVector, name=name, filename=self.filename)
def __init__(self, cwd, inputFilePath): """create utilities required by the program, i.e. a registry checker, reader and module list """ self.cwd = cwd self.inputFilePath = inputFilePath self._registryChecker = RegistryChecker() self._inputReader = Reader() self.moduleList = ModuleList() self.__prepModuleResult = DataContainer() return
def processConfig(confData, mandatory, optionsrec, databaserec): ## TODO: DETERMINE THE KEYS PRESENT IN THE FIELDS out = dict() fieldnames = list() for a in parse("$..fields").find(confData): for k in a.value: fieldnames.append(k) if k not in out: out[k] = None #for k in allfields: # print k #exit() res = confData["resources"] outfile = None if "WRITEFILE" in optionsrec: outfile = codecs.open(optionsrec["WRITEFILE"], "w", "utf-8") #if "CALLGRAPHFILE" in optionsrec: # callgraphfile=codecs.open(optionsrec["WRITEFILE"], "w", "utf-8") obj = DataContainer(res, out, fieldnames, mandatory, optionsrec, databaserec) obj.process()
def __init__(self, data, unit=1, error=None, mask=None, dimensions=None, longname=u"Sampled Field", shortname=u"\\Psi", attributes=None, rescale=False): DataContainer.__init__(self, longname, shortname, attributes) self.data = data self.mask = mask try: if isinstance(unit, (str, unicode)): unit = unit.replace('^', '**') if isinstance(unit, unicode): unit = unit.encode('utf-8') self.unit = Quantity(unit) except: try: self.unit = Quantity("1" + unit) except: self.unit = unit self.error = error if dimensions != None: self.dimensions = dimensions else: N = len(data.shape) - 1 self.dimensions = [ generateIndex(N - i, n) for i, n in enumerate(data.shape) ] if rescale: self.rescale() for dim in self._dimensions: dim.rescale() assert self.isValid()
def __loadOutput(self): """Read output requirements under 'requirements' keyword Returns: DataContainer containing output requirements """ # read data from config and input file (NB. input file data may overwrite config data) outputData = self._inputReader.read('requirements') # check if a block of output requirements was found # if nothing is specified, do require output # if more than one block is found, take the last one if len(outputData) > 0: outputData = outputData[-1] else: outputData = DataContainer() return outputData
corr_res = np.zeros((pnl_range.shape[0], inl_range.shape[0])) for i, population_noise_level in enumerate(pnl_range): # for j, inhibition_noise_level in enumerate(inl_range): # mu['noise_level'] = population_noise_level mu['var_inh_noise_level'] = population_noise_level which_var = 'var_inh_noise_infl' which_values = inl_range rn = 'inl_inf_' + str(population_noise_level) print 'running simulation with %s = %0.5f' % ('var_inh_noise_level', population_noise_level) # Create an instance of callback class nr_simulations = which_values.shape[0] dc = DataContainer(file_name + '.hdf5') da = DataAnalyzer(dc) if simulate: dc.setup_for_simulation(nr_timepoints = nr_timepoints, nr_simulations = nr_simulations, nr_variables = nr_variables) # running these in parallel # Creates jobserver with automatically detected number of workers job_server = pp.Server(ppservers=()) # Execute the same task with different amount of active workers and measure the time for index in xrange(nr_simulations): mu[which_var] = which_values[index] job_server.submit(run_sim, (mu, nr_timepoints, func, npS), callback=dc.save_to_array) #wait for jobs in all groups to finish job_server.wait() job_server.destroy()
time.sleep(1) while not rospy.is_shutdown(): try: tf_.waitForTransform("/nav", "/base_link", data.scan_ts, rospy.Duration.from_sec(0.5)) (trans,rot) = tf_.lookupTransform("/nav", "/base_link", tt) # data.dcm = tf.transformations.quaternion_matrix(rot) data.euler = tf.transformations.euler_from_quaternion(rot) #print "trans:", trans #print "q:", rot #print "matrix", tf.transformations.quaternion_matrix(rot) except: glogger.error("can't listen tf!") # time.sleep(1) glogger.info("tf broadcaster quit.") if __name__ == '__main__': global plotter, dt, data, icp data = DataContainer() # icp = SM_ICP() icp = CSM_ICP() dt = 0 glogger = get_glogger(0, __file__) rospy.init_node('slam_py', anonymous=True) tt = rospy.Time.now() sub1 = threading.Thread(target = listener, args = ()) sub1.start() # sub2 = threading.Thread(target = tf_broadcaster, args = ()) # sub2.start() plotter = Plotter() plotter.show()
temp for temp in list(range(data.shape[0])) if temp not in self._training_index ] self._training_index.sort() testing_index_list.sort() train_data_container = self.__SetNewData(data_container, self._training_index) test_data_container = self.__SetNewData(data_container, testing_index_list) # if store_folder: # train_data_container.Save(os.path.join(store_folder, 'train_numeric_feature.csv')) # df_training = pd.DataFrame(self._training_index) # df_training.to_csv(os.path.join(store_folder, 'training_index.csv'), sep=',', quotechar='"') # test_data_container.Save(os.path.join(store_folder, 'test_numeric_feature.csv')) # df_testing = pd.DataFrame(testing_index_list) # df_testing.to_csv(os.path.join(store_folder, 'testing_index.csv'), sep=',', quotechar='"') return train_data_container, test_data_container if __name__ == '__main__': data = DataContainer() data.Load(r'..\..\Example\numeric_feature.csv') data_separator = DataSeparate() data_separator.Run(data, store_folder=r'..\..\Example')
return int(selection) end_test = False while not end_test: selection_valid = False dataChoice = getValidSelection( 1, 2, "Which data set would you like to use? \n [1] Data set 1 (A-Z) \n [2] Data set 2 (Greek symbols) \n :" ) if dataChoice == 1: if not 'd1' in locals(): d1 = DataContainer("data-set-1", './Assig1-Dataset/train_1.csv', './Assig1-Dataset/test_with_label_1.csv', './Assig1-Dataset/val_1.csv', './Assig1-Dataset/info_1.csv') dataSet = d1 else: if not 'd2' in locals(): d2 = DataContainer("data-set-2", './Assig1-Dataset/train_2.csv', './Assig1-Dataset/test_with_label_2.csv', './Assig1-Dataset/val_2.csv', './Assig1-Dataset/info_2.csv') dataSet = d2 opType = getValidSelection( 1, 9, "Which test would you like to do? \n [1] GNB \n [2] BASE-DT \n [3] BEST-DT (Configure test params in TreeTestParams.py) \n [4] PER \n [5] BASE-MLP \n [6] BEST-MLP (configure test params in MLPTestParams.py \n [7] All base tests (GNB, BASE-DT , PER, BASE-MLP) \n [8] ALL tests \n [9] Plot data \n:" ) if opType == 9: PlotDataContainer(dataSet)
def read(self, chapter='', name=''): """General reader. Reads data in file between 'chapter' 'name' and the next 'chapter' tag (first 'chapter' is inclusive) This has the following structure: chaptername name (string) key value (scalar, string or vector) " " key subkey value " " key " " " " Parameters: chapter - (str, optional) chapter name Default: ''; read full file name - (str, optional) name of 'chapter' to be read Returns: list containing DataContainers with read data for each chapter block read """ self.filePointer.seek(0) # restarts reading the file startChapter = [chapter, name] endChapter = chapter inChapter = False inIndent = False containerList = [] dataStructures = [] #start reading for line in self.filePointer: line = self.__removeComment(line) linesplit = ( (line.replace('.', ' ')).replace('\t', ' ')).split(' ') + [''] if endChapter in linesplit and inChapter: # stop reading. Convert and clean result inChapter = not inChapter if inIndent: # add sublist if this has not been done yet dataStructures[-1].append(sublist) # convert raw data to a dictionary and load into a DataContainer dataStructures = nestedListToDictionary(dataStructures) try: dataStructures = DataContainer(dataStructures) except: raise KnownError( 'Incomplete entry in the input file. Please check if there is an unfinished entry with keyword "module".' ) containerList.append(dataStructures) if startChapter[0] in linesplit and startChapter[ 1] in linesplit and not inChapter: # start reading inChapter = not inChapter inIndent = False dataStructures = [] if inChapter: #in the right chapter if not line == "": # if line starts with a space/tab: this line belongs to a block if line[:1] == '\t' or line[:1] == " ": # start a new block if no block was started yet if not inIndent: sublist = dataStructures[-1][1:] inIndent = True # if isinstance(sublist[0], list): sublist.append(line.split()) # else: sublist = [sublist] sublist.append(line.split()) del dataStructures[-1][1:] elif (not (line[:1] == '\t' or line[:1] == " ")) and inIndent: inIndent = False dataStructures[-1].append(sublist) dataStructures.append(line.split()) else: dataStructures.append(line.split()) # repeat the block controlling reading stop # stop reading. Convert and clean result if inChapter: if inIndent: # add sublist if this has not been done yet dataStructures[-1].append(sublist) # convert raw data to a dictionary and load into a DataContainer dataStructures = nestedListToDictionary(dataStructures) dataStructures = DataContainer(dataStructures) containerList.append(dataStructures) return containerList
def loadData(self, name="train", delimiter=',', target_col=-1): fin = self.__gzOpen(self.filename, "r") reader = csv.reader(fin, delimiter=delimiter) data = [] classes = [] hasclass = False target_col = -1 first_line = reader.next() # training set has to contain targets if name == 'train': if len(first_line) <= target_col: raise Exception( 'Target column does not match total column number.') for i in xrange(len(first_line) - 1): data.append([]) hasclass = True # test set may contain target values if name == 'test': if len(first_line) > target_col: for i in xrange(len(first_line) - 1): data.append([]) hasclass = True else: for i in xrange(len(first_line)): data.append([]) hasclass = False # skip header if available if first_line[0].isalpha(): pass else: line = first_line if hasclass: classes.append(float(line[target_col])) line.remove(line[target_col]) for i in xrange(len(line)): data[i].append(float(line[i])) for line in reader: if hasclass: classes.append(float(line[target_col])) line.remove(line[target_col]) for i in xrange(len(line)): data[i].append(float(line[i])) # cleaning up and return fin.close() dim = len(data) size = len(data[0]) dataMatrix = DataMatrix(size, dim) tempVector = DataVector(dim) valuesVector = DataVector(size) for rowIndex in xrange(size): for colIndex in xrange(dim): tempVector[colIndex] = data[colIndex][rowIndex] dataMatrix.setRow(rowIndex, tempVector) valuesVector[rowIndex] = classes[rowIndex] return DataContainer(dataMatrix, valuesVector, name, self.filename)
def parse_content(content, name): content_type, content_string = content.split(',') dc = DataContainer(base64.b64decode(content_string)) global uploaded_files uploaded_files[name] = dc return html.Div(html.Div('File uploaded'))
# Pierre Jacquier # Albert Faucher # Group 70 # COMP 551 MP3 # March 18 2019 from DataContainer import DataContainer from DigitDeepConvNN import DigitDeepConvNN from helpers import * import matplotlib.pyplot as plt # auto fallback to cpu device = getDevice() # load training data from files train_data = DataContainer("./input/train_images.pkl", "./input/train_labels.csv") # create model and load it on cuda core model = DigitDeepConvNN().to(device) model.init_optimizer() # get training data and val data imgs_train, y_train = train_data.get_datas(0, 35000) imgs_val, y_val = train_data.get_datas(35000, 5000) x_train = preprocess(imgs_train, find_digit=True, flag=df.CROP_TIGHT, print_first=True) x_val = preprocess(imgs_val, find_digit=True, flag=df.CROP_TIGHT) # train model
from DataContainer import DataContainer, DataContainerTypes from NB import NaivesBayes from persoMath import divide testName = "NB-BOW-FV" trainingData = DataContainer("name", './covid_training.tsv') bayesModel = NaivesBayes(name=testName, filtering=True) bayesModel.train(trainingData.parsedData) testingData = DataContainer("name", './covid_test_public.tsv') predictions = bayesModel.predict(testingData.parsedData) matches = [0, 0] class_yes_results = {'TP': 0, 'FP': 0, 'FN': 0} class_no_results = {'TP': 0, 'FP': 0, 'FN': 0} for index, prediction in enumerate(predictions): matches[prediction == testingData.parsedData[index][ DataContainerTypes.CLASS]] += 1 if prediction == testingData.parsedData[index][DataContainerTypes.CLASS]: if prediction == 'yes': class_yes_results['TP'] += 1 else: class_no_results['TP'] += 1 else: if prediction == 'yes': class_yes_results['FP'] += 1 class_no_results['FN'] += 1 else: class_yes_results['FN'] += 1 class_no_results['FP'] += 1
def __loadModule(self, outputReq): """Read data of modules from input file For all found modules, it reads the data from input, loads its registry record and instantiates it. Registry records containing placeholder '@' will be refactored here before instantiating a module. Parameters: outputReq - (DataContainer) with output requirements as read from input """ # read data from config and input file (NB. input file data may overwrite config data) configData = self.__loadConfig() inputData = self._inputReader.read('module') for dataContainer in inputData: # for each tag 'module' in input: # iterate over all module types specified moduleList = toList(dataContainer.v('module')) for moduleName in moduleList: # make a new copy of the data container so that each module has a unique data container data = DataContainer() data.merge(configData) data.merge( dataContainer) # input data may overwrite config data data.addData('module', moduleName) # load their registry registerData = self._registryChecker.readRegistryEntry( moduleName) self._registryChecker.refactorRegistry(data, registerData, output=outputReq) # set different data set for output module if registerData.v('outputModule') == 'True': data = DataContainer( ) # do not include config data for output module; start a new one data.merge(dataContainer) data.addData('module', moduleName) data.addData( 'inputFile', self.inputFilePath ) # output needs the input file, add this to its data # make the module self.moduleList.addModule(data, registerData, outputReq) return
# Group 70 # COMP 551 MP3 # March 18 2019 from DataContainer import DataContainer from SimpleNN import SimpleNN import torch from helpers import * import numpy as np from timeit import default_timer as timer # auto fallback to cpu device = getDevice() # load training data from files train_data = DataContainer("./input/train_images.pkl", "./input/train_labels.csv") # create model and load it on cuda core model = SimpleNN(d_in=4096, h=200, d_out=10).to(device) model.init_optimizer() imgs, labels = train_data.get_datas(0, 35000) # convert labels to neural network format (1 output neuron per label) label_array = labels_to_array(labels, 10) # flatten and normalize image since we have a fully connected model imgs_flatten = flatten_imgs(imgs) t = timer() model.train_all_batches(x=imgs_flatten,
def init_prev_container(self, first_frame, focal, pp, tfl_man , tfl): first_img_path = first_frame first = DataContainer(first_img_path ,tfl) candidates, auxiliary = tfl_man.light_attention(first) tfl_man.tfl_detection(first,candidates,auxiliary) return first
# Michael Segev # Pierre Jacquier # Albert Faucher # Group 70 # COMP 551 MP3 # March 18 2019 from DataContainer import DataContainer from ConvNN import ConvNN from helpers import * # auto fallback to cpu device = getDevice() # load training data from files train_data = DataContainer("./input/train_images.pkl", "./input/train_labels.csv") # create model and load it on cuda core model = ConvNN().to(device) model.init_optimizer() # get training data and val data imgs_train, y_train = train_data.get_datas(0, 35000) x_train = preprocess(imgs_train) imgs_val, y_val = train_data.get_datas(35000, 5000) x_val = preprocess(imgs_val) # train model model.train_all_batches(x=x_train, y=y_train, batch_size=64,
# Michael Segev # Pierre Jacquier # Albert Faucher # Group 70 # COMP 551 MP3 # March 18 2019 from DataContainer import DataContainer import pandas as pd import pickle from helpers import * # load training data from files train_data = DataContainer("./input/train_images.pkl", "./input/train_labels.csv") # get training data and val data imgs, _ = train_data.get_datas(0, 40000) for i in range(len(imgs)): imgs[i] = df.flagCropTight(imgs[i]) with open("./input/train_images_crop_tight.pkl", "wb") as f: pickle.dump(imgs, f)