def main(): parser = OptionParser(usage=description) parser.add_option("--saccade_data", help="Main data directory", default='saccade_data') parser.add_option("--db", help='Location of output Flydra db.') (options, args) = parser.parse_args() #@UnusedVariable if not options.db: raise Exception('Please define FlydraDB directory using `--db`.') verbose = True flydra_db = FlydraDB(options.db, create=True) matlab_dir = options.saccade_data for group in os.listdir(matlab_dir): group_dir = os.path.join(matlab_dir, group) if not os.path.isdir(group_dir): continue if verbose: print("Opening {0}".format(group)) for file in [file for file in os.listdir(group_dir) if (file.startswith('magno_')) \ and file.endswith('.mat')]: sample = file[file.index('_') + 1:file.index('.')] if verbose: print(" - Considering sample {0}".format(sample.__repr__())) if not flydra_db.has_sample(sample): flydra_db.add_sample(sample) flydra_db.add_sample_to_group(sample, group) # flydra_db.add_sample_to_group(sample, 'ros') filename = os.path.join(group_dir, file) exp_data, attributes = read_raw_data(filename) consider_importing_processed(flydra_db, sample, exp_data, attributes) flydra_db.set_attr(sample, 'species', attributes['species']) flydra_db.set_attr(sample, 'background', attributes['background']) flydra_db.set_table(sample, EXP_DATA_TABLE, exp_data) flydra_db.close()
def main(): parser = OptionParser(usage=description) parser.add_option("--saccade_data", help="Main data directory", default='saccade_data') parser.add_option("--db", help="FlydraDB directory") parser.add_option("--verbose", help='Verbose output', default=False, action="store_true") (options, args) = parser.parse_args() #@UnusedVariable if not options.db: raise Exception('Please define FlydraDB directory using `--db`.') def printv(s): if options.verbose: print(s) flydra_db = FlydraDB(options.db, create=True) matlab_dir = options.saccade_data for group in os.listdir(matlab_dir): group_dir = os.path.join(matlab_dir, group) if not os.path.isdir(group_dir): continue printv("Opening {0}".format(group)) # # # exp_data, attributes = read_raw_data(filename) # # consider_importing_processed(flydra_db, sample, exp_data, attributes) # # flydra_db.set_attr(sample, 'species', attributes['species']) # flydra_db.set_attr(sample, 'background', attributes['background']) # # flydra_db.set_table(sample, EXP_DATA_TABLE, exp_data) # flydra_db.add_sample_to_group(sample, group) # flydra_db.add_sample_to_group(sample, 'ros') # processed_dir = os.path.join(group_dir, 'processed') if not os.path.exists(processed_dir): printv("No processed data found for group %r." % group) continue for conf in os.listdir(processed_dir): # first look for saccades.mat saccades_file = os.path.join(processed_dir, conf, 'saccades.mat') if os.path.exists(saccades_file): printv('Loading from file %r.' % saccades_file) saccades = saccades_read_mat(saccades_file) samples = numpy.unique(saccades['sample']) for sample in samples: if not flydra_db.has_sample(sample): flydra_db.add_sample(sample) flydra_db.add_sample_to_group(sample, group) sample_saccades = saccades[saccades[:]['sample'] == sample] flydra_db.set_table(sample=sample, table=SACCADES_TABLE, version=conf, data=sample_saccades) # else: # prefix = 'data_' # suffix = '.mat' # for file in [file for file in os.listdir(group_dir) # if (file.startswith(prefix)) and file.endswith(suffix)]: # # sample = file[len(prefix):file.index('.')] # # if verbose: # print(" - Considering sample {0}".format(sample.__repr__())) # # if not flydra_db.has_sample(sample): # flydra_db.add_sample(sample) # # filename = os.path.join(group_dir, file) # # # # else: # for conf in os.listdir(processed_dir): # saccades = os.path.join(processed_dir, conf, 'saccades.mat') # if os.path.exists(saccades): # group_record.configurations[conf] = saccades # # add to general list # self.configurations.add(conf) ## else: ## conf_dir = os.path.join(processed_dir, conf) ## for file in [file for file in os.listdir(conf_dir) ## if file.startswith('processed_data_') and file.endswith('.mat')]: ## id = file[5:-7] # # # if we don't have exp data, get list of samples from # # processed data # if group_record.configurations and \ # not group_record.has_experimental_data: # saccades = saccades_read_mat(saccades) # group_record.samples = set(numpy.unique(saccades['sample'])) # for sample in group_record.samples: # self.sample2group[sample] = group # # if len(group_record.samples)> 0: # self.groups[group] = group_record # # print "has it", group, group_record.has_experimental_data # flydra_db.close()
class SamplesDB: def __init__(self, data, verbose=False): ''' data: base directory ''' if not os.path.exists(data) or not os.path.isdir(data): raise Exception('Could not open directory %s' % data) self.data = data # self.use_cache = True self.use_cache = False self.use_flydra_db = True if self.use_cache: self.open_shelve() if self.use_flydra_db: self.open_flydra_db() self.groups = {} #self.group2samples = {} # maps id to .mat file self.sample2expmat = {} # maps id to .pickle file self.sample2exppickle = {} # list of all configurations self.configurations = set() # maps sample -> group self.sample2group = {} #print "Loading data in %s" % data for group in os.listdir(data): group_dir = os.path.join(data, group) if not os.path.isdir(group_dir): continue # print "Reading group %s" % group group_record = Group() for file in [file for file in os.listdir(group_dir) if file.startswith('data_') and file.endswith('.mat')]: id = file[5:-4] group_record.samples.add(id) self.sample2expmat[id] = os.path.join(group_dir,file) self.sample2group[id] = group for file in [file for file in os.listdir(group_dir) if file.startswith('data_') and file.endswith('.pickle')]: id = file[5:-7] group_record.samples.add(id) self.sample2exppickle[id] = os.path.join(group_dir,file) self.sample2group[id] = group group_record.has_experimental_data = len(group_record.samples) > 0 processed_dir = os.path.join(group_dir, 'processed') if not os.path.exists(processed_dir): if verbose: print "No processed data found for %s." % group pass else: for conf in os.listdir(processed_dir): saccades = os.path.join(processed_dir, conf, 'saccades.mat') if os.path.exists(saccades): group_record.configurations[conf] = saccades # add to general list self.configurations.add(conf) # else: # conf_dir = os.path.join(processed_dir, conf) # for file in [file for file in os.listdir(conf_dir) # if file.startswith('processed_data_') and file.endswith('.mat')]: # id = file[5:-7] # if we don't have exp data, get list of samples from # processed data if group_record.configurations and \ not group_record.has_experimental_data: saccades = saccades_read_mat(saccades) group_record.samples = set(numpy.unique(saccades['sample'])) for sample in group_record.samples: self.sample2group[sample] = group if len(group_record.samples)> 0: self.groups[group] = group_record print "has it", group, group_record.has_experimental_data def open_shelve(self): shelve_fname = os.path.join(self.data, 'shelve') self.shelve = shelve.open(shelve_fname, protocol=pickle.HIGHEST_PROTOCOL) def open_flydra_db(self): self.flydra_db = FlydraDB(os.path.join(self.data, 'sac_flydra_db')) def list_groups(self): """ Returns a list of the groups. """ return natsorted(list(self.groups.keys())) def list_all_samples(self): """ Returns a list of all samples for all groups. """ return natsorted(list(self.sample2group.keys())) def list_samples(self, group): """ Lists the samples in the given group. """ return natsorted(list(self.groups[group].samples)) def list_all_configurations(self): """ Lists all the configurations present in the data. """ return natsorted(self.configurations) def list_configurations(self, group): """ Lists the configurations for the given group. """ return natsorted(list(self.groups[group].configurations.keys())) def get_group_for_sample(self, sample): """ Returns the sample associated to the group. """ return self.sample2group[sample] def get_saccades_for_group(self, group, configuration): """ Returns the saccades for the given group and configuration. If configuration is not passed, we use the default. """ if self.use_flydra_db: table = 'groupsaccades_%s' % configuration if self.flydra_db.has_sample(group) and \ self.flydra_db.has_table(group, table): t = self.flydra_db.get_table(group, table) #value = t.copy() value = t #self.flydra_db.release_table(t) return value if self.use_cache: key = str(('get_saccades_for_group', group, configuration)) if key in self.shelve: return self.shelve[key] filename = self.groups[group].configurations[configuration] saccades = saccades_read_mat(filename) if self.use_flydra_db: if not self.flydra_db.has_sample(group): self.flydra_db.add_sample(group) self.flydra_db.set_table(group, table, saccades) if self.use_cache: self.shelve[key] = saccades return saccades def group_has_experimental_data(self, group): """ Returns true if this group has the raw orientation data. (mamarama has only saccades data. ) """ return self.groups[group].has_experimental_data def has_experimental_data(self, sample): """ Returns true if this sample has the raw orientation data. """ return sample in self.sample2expmat or sample in self. sample2exppickle def get_saccades_for_sample(self, sample, configuration): """ Returns the saccades for the given group and configuration. If configuration is not passed, we use the default. """ if self.use_flydra_db: table = 'saccades_%s' % configuration if self.flydra_db.has_sample(sample) and \ self.flydra_db.has_table(sample, table): t = self.flydra_db.get_table(sample, table) #value = t.copy() value = t #self.flydra_db.release_table(t) return value if self.use_cache: key = str(('get_saccades_for_sample', sample, configuration)) if key in self.shelve: return self.shelve[key] group = self.get_group_for_sample(sample) group_saccades = self.get_saccades_for_group(group, configuration) print group_saccades[0].dtype # with open('tmp.pickle','w') as f: # pickle.dump(f, group_saccades) mine = group_saccades[:]['sample'] == sample saccades = group_saccades[mine] if len(saccades) == 0: raise Exception('No saccades found for %s' % sample) if self.use_flydra_db: if not self.flydra_db.has_sample(sample): self.flydra_db.add_sample(sample) self.flydra_db.set_table(sample, table, saccades) if self.use_cache: self.shelve[key] = saccades return saccades def get_experimental_data(self, sample): # if self.use_flydra_db: # table = 'tethered_data' # if self.flydra_db.has_sample(sample) and \ # self.flydra_db.has_table(sample, table): # t = self.flydra_db.get_table(sample, table) # #value = t.copy() # value = t # #self.flydra_db.release_table(t) # return value # if self.use_cache: if sample in self.shelve: return self.shelve[sample] if sample in self.sample2expmat: data = scipy.io.loadmat(self.sample2expmat[sample], squeeze_me=True) data = data['data'] # convert from array to hash assert isinstance(data, numpy.ndarray) data = dict(map(lambda field: (field, data[field]), data.dtype.fields)) # convert from array to string for k in list(data.keys()): if data[k].dtype.char == 'U': data[k] = str(data[k]) # make sure everything is 1d array def as1d(x): if x.dtype == 'object': x = x.tolist() return x.reshape(len(x)) data['exp_orientation'] = as1d(data['exp_orientation']) data['exp_timestamps'] = as1d(data['exp_timestamps']) elif sample in self.sample2exppickle: with open(self.sample2exppickle[sample], 'rb') as f: data = cPickle.load(f) else: raise Exception('no data for sample %s found' % sample) # if self.use_flydra_db: # if not self.flydra_db.has_sample(sample): # self.flydra_db.add_sample(sample) # # self.flydra_db.set_table(sample, table, data) if self.use_cache: self.shelve[sample] = data return data def __getstate__(self): # do not pickle the shelve all = dict(self.__dict__) all['shelve'] = None all['flydra_db'] = None return all
def main_filter(args): parser = LenientOptionParser() parser.add_option("--db", default='flydra_db', help="FlydraDB directory") parser.add_option("--min_frames_per_track", default=400, help="Minimum number of frames per track [= %default]") parser.add_option("--confirm_problems", help="Stop interactively on problems with log files'\ '(e.g.: cannot find valid obj_ids) [default: %default]", default=False, action="store_true") parser.add_option("--dynamic_model_name", help="Smoothing dynamical model [default: %default]", default="mamarama, units: mm") parser.add_option("--debug_output", help="Creates debug figures.", default=False, action="store_true") parser.add_option("--nocache", help="Ignores already computed results.", default=False, action="store_true") parser.add_option("--smoothing", help="Uses Kalman-smoothed data.", default=False, action="store_true") (options, args) = parser.parse_args(args) table_name = 'rows' # TODO: use constant table_version = "smooth" if options.smoothing else "kf" if not args: raise UserError('No files or directories specified.') if not os.path.exists(options.db): os.makedirs(options.db) db = FlydraDB(options.db) good_files = get_good_files(where=args, pattern="*.kh5", confirm_problems=options.confirm_problems) if len(good_files) == 0: logger.error("No good files to process") sys.exit(1) n = len(good_files) for i in range(n): (filename, obj_ids, stim_fname) = good_files[i] logger.info('Sample %s/%s: %s' % (i + 1, n, filename)) # only maintain basename stim = os.path.splitext(os.path.basename(stim_fname))[0] sample_id = os.path.splitext(os.path.basename(filename))[0] logger.info("File %d/%d %s %s %s " % (i, n, str(filename), str(obj_ids), stim_fname)) if (db.has_sample(sample_id) and db.has_table(sample_id, table_name, table_version) and not options.nocache): logger.info('Sample %r already computed; skipping.' ' (use --nocache to ignore)' % sample_id) continue all_data = [] for obj_id, rows in get_good_smoothed_tracks(#@UnusedVariable filename=filename, obj_ids=obj_ids, min_frames_per_track=options.min_frames_per_track, dynamic_model_name=options.dynamic_model_name, use_smoothing=options.smoothing): filtered = filter_rows(rows, options) all_data.append(filtered) if not all_data: logger.info('Not enough data found for %r; skipping.' % sample_id) continue if not db.has_sample(sample_id): db.add_sample(sample_id) db.set_attr(sample_id, 'stim_fname', stim_fname) db.set_attr(sample_id, 'stimulus', stim) stim_xml = open(stim_fname).read() db.set_attr(sample_id, 'stimulus_xml', stim_xml) geometry = get_posts_info(stim_xml) print(geometry) db.set_attr(sample_id, 'posts', geometry['posts']) if 'arena' in geometry: db.set_attr(sample_id, 'arena', geometry['arena']) db.add_sample_to_group(sample_id, stim) if stim != 'nopost': db.add_sample_to_group(sample_id, 'posts') rows = numpy.concatenate(all_data) db.set_table(sample=sample_id, table=table_name, data=rows, version=table_version) db.set_attr(sample_id, 'filter_time', datetime.now().strftime("%Y%m%d_%H%M%S")) db.set_attr(sample_id, 'filter_host', platform.node()) db.set_attr(sample_id, 'filter_user', get_user()) db.set_attr(sample_id, 'filter_python_version', platform.python_version()) db.set_attr(sample_id, 'filter_numpy_version', numpy.version.version) db.close()