def dump_data(self, hdf5_folder, filename, timezone, subjects={}): mprintln('Dumping the data from: ' + filename) out_h5ls = check_output([hdf5_folder + 'h5ls', filename + '/subjects']) # cache_filenmae = 'obj.save' # if os.path.isfile(cache_filenmae): # f = file(cache_filenmae, 'rb') # loaded_data = cPickle.load(f) # f.close() # return loaded_data for n in subjects: print n + " - " + str(len(subjects[n])) for n in out_h5ls.split('\n'): subject = n.split(' ')[0] if subject: out_h5dump = check_output([hdf5_folder + 'h5dump', '--group=subjects/' + subject, filename]) if subject not in subjects: subjects[subject] = [] subjects[subject] += self.extract_data(out_h5dump, filename) # f = file(cache_filenmae, 'wb') # cPickle.dump(subjects, f, protocol=cPickle.HIGHEST_PROTOCOL) # f.close() for n in subjects: print n + " - " + str(len(subjects[n])) return subjects
def save_to_csv(self, fname, subjects): mprintln('Saving the data to: ' + fname) with open(fname, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for subject in subjects: for record in subject: writer.writerow(record)
def process_all(self, hdf5_folder, columns, antennas, timezone, input_files, bin_time): mprintln('Processing the data...') mprint(str(len(input_files)) + ' file(s) found') result = {} subjects_data = {} for n in range(len(input_files)): subjects_data = self.dump_data(hdf5_folder, input_files[n], subjects_data) for subject in subjects_data: while True: binned_data = self.get_binned_record(subjects_data[subject], bin_time) if binned_data: # remove the records from the list subjects_data[subject] = subjects_data[subject][len(binned_data):] # add binned data to the result list result = self.add_record_to_dictionary(result, timezone, subject, binned_data, columns, antennas) else: break return result