def main():
    parser = OptionParser(usage=description)
    parser.add_option("--saccade_data", help="Main data directory",
    parser.add_option("--db", help='Location of output Flydra db.')
    (options, args) = parser.parse_args() #@UnusedVariable
    if not options.db:
        raise Exception('Please define FlydraDB directory using `--db`.')
    verbose = True
    flydra_db = FlydraDB(options.db, create=True)
    matlab_dir = options.saccade_data
    for group in os.listdir(matlab_dir):
        group_dir = os.path.join(matlab_dir, group)
        if not os.path.isdir(group_dir):                
        if verbose:
            print("Opening {0}".format(group))
        for file in [file for file in os.listdir(group_dir) 
            if (file.startswith('magno_')) \
               and file.endswith('.mat')]:
            sample = file[file.index('_') + 1:file.index('.')]
            if verbose:
                print("  - Considering sample {0}".format(sample.__repr__()))
            if not flydra_db.has_sample(sample):
            flydra_db.add_sample_to_group(sample, group)
#           flydra_db.add_sample_to_group(sample, 'ros')
            filename = os.path.join(group_dir, file)
            exp_data, attributes = read_raw_data(filename)
            consider_importing_processed(flydra_db, sample, exp_data, attributes)
            flydra_db.set_attr(sample, 'species', attributes['species'])
            flydra_db.set_attr(sample, 'background', attributes['background'])            
            flydra_db.set_table(sample, EXP_DATA_TABLE, exp_data)
def main():
    parser = OptionParser(usage=description)
    parser.add_option("--saccade_data", help="Main data directory",
    parser.add_option("--db", help="FlydraDB directory")
    parser.add_option("--verbose", help='Verbose output',
                      default=False, action="store_true")
    (options, args) = parser.parse_args() #@UnusedVariable
    if not options.db:
        raise Exception('Please define FlydraDB directory using `--db`.')
    def printv(s):
        if options.verbose:
    flydra_db = FlydraDB(options.db, create=True)
    matlab_dir = options.saccade_data
    for group in os.listdir(matlab_dir):
        group_dir = os.path.join(matlab_dir, group)
        if not os.path.isdir(group_dir):                
        printv("Opening {0}".format(group))
#            exp_data, attributes = read_raw_data(filename)
#            consider_importing_processed(flydra_db, sample, exp_data, attributes)
#            flydra_db.set_attr(sample, 'species', attributes['species'])
#            flydra_db.set_attr(sample, 'background', attributes['background'])
#            flydra_db.set_table(sample, EXP_DATA_TABLE, exp_data)
#            flydra_db.add_sample_to_group(sample, group)
#            flydra_db.add_sample_to_group(sample, 'ros')
        processed_dir = os.path.join(group_dir, 'processed')
        if not os.path.exists(processed_dir):
            printv("No processed data found for group %r." % group)
        for conf in os.listdir(processed_dir):
            # first look for saccades.mat
            saccades_file = os.path.join(processed_dir, conf, 'saccades.mat')
            if os.path.exists(saccades_file):
                printv('Loading from file %r.' % saccades_file)
                saccades = saccades_read_mat(saccades_file)
                samples = numpy.unique(saccades['sample'])
                for sample in samples:
                    if not flydra_db.has_sample(sample):
                    flydra_db.add_sample_to_group(sample, group)
                    sample_saccades = saccades[saccades[:]['sample'] == sample]
                    flydra_db.set_table(sample=sample, table=SACCADES_TABLE,
                                        version=conf, data=sample_saccades)
#            else:
#                prefix = 'data_'
#        suffix = '.mat'
#        for file in [file for file in os.listdir(group_dir) 
#            if (file.startswith(prefix)) and file.endswith(suffix)]:
#            sample = file[len(prefix):file.index('.')]
#            if verbose:
#                print("  - Considering sample {0}".format(sample.__repr__()))
#            if not flydra_db.has_sample(sample):
#                flydra_db.add_sample(sample)
#            filename = os.path.join(group_dir, file)
#        else:
#            for conf in os.listdir(processed_dir):                
#                saccades = os.path.join(processed_dir, conf, 'saccades.mat')
#                if os.path.exists(saccades): 
#                    group_record.configurations[conf] = saccades
#                    # add to general list
#                    self.configurations.add(conf)
##                    else:
##                        conf_dir = os.path.join(processed_dir, conf)
##                        for file in [file for file in os.listdir(conf_dir) 
##                            if file.startswith('processed_data_') and file.endswith('.mat')]: 
##                                  id = file[5:-7]
#            # if we don't have exp data, get list of samples from
#            # processed data
#            if group_record.configurations and \
#                not group_record.has_experimental_data:
#                saccades = saccades_read_mat(saccades)
#                group_record.samples = set(numpy.unique(saccades['sample']))
#                for sample in group_record.samples:
#                    self.sample2group[sample] = group
#        if len(group_record.samples)> 0:
#            self.groups[group] = group_record
#            print "has it", group, group_record.has_experimental_data
Exemple #3
class SamplesDB:
    def __init__(self, data, verbose=False):
        ''' data: base directory '''
        if not os.path.exists(data) or not os.path.isdir(data):
            raise Exception('Could not open directory %s' % data)
        = data
        # self.use_cache = True
        self.use_cache = False
        self.use_flydra_db = True
        if self.use_cache:
        if self.use_flydra_db:
        self.groups = {}
        #self.group2samples = {}
        # maps id to .mat file
        self.sample2expmat = {}
        # maps id to .pickle file
        self.sample2exppickle = {}
        # list of all configurations
        self.configurations = set()
        # maps sample -> group
        self.sample2group = {}
        #print "Loading data in %s" % data
        for group in os.listdir(data):
            group_dir = os.path.join(data, group)
            if not os.path.isdir(group_dir):                
            # print "Reading group %s" % group
            group_record = Group()
            for file in [file for file in os.listdir(group_dir) 
                if file.startswith('data_') and file.endswith('.mat')]:
                id = file[5:-4]
                self.sample2expmat[id] =  os.path.join(group_dir,file)
                self.sample2group[id] = group
            for file in [file for file in os.listdir(group_dir) 
                if file.startswith('data_') and file.endswith('.pickle')]: 
                id = file[5:-7]
                self.sample2exppickle[id] = os.path.join(group_dir,file)
                self.sample2group[id] = group

            group_record.has_experimental_data = len(group_record.samples) > 0
            processed_dir = os.path.join(group_dir, 'processed')
            if not os.path.exists(processed_dir):
                if verbose:
                    print "No processed data found for %s." % group
                for conf in os.listdir(processed_dir):                
                    saccades = os.path.join(processed_dir, conf, 'saccades.mat')
                    if os.path.exists(saccades): 
                        group_record.configurations[conf] = saccades
                        # add to general list
#                    else:
#                        conf_dir = os.path.join(processed_dir, conf)
#                        for file in [file for file in os.listdir(conf_dir) 
#                            if file.startswith('processed_data_') and file.endswith('.mat')]: 
#                                  id = file[5:-7]

                # if we don't have exp data, get list of samples from
                # processed data
                if group_record.configurations and \
                    not group_record.has_experimental_data:
                    saccades = saccades_read_mat(saccades)
                    group_record.samples = set(numpy.unique(saccades['sample']))
                    for sample in group_record.samples:
                        self.sample2group[sample] = group

            if len(group_record.samples)> 0:
                self.groups[group] = group_record
                print "has it", group, group_record.has_experimental_data
    def open_shelve(self):
        shelve_fname = os.path.join(, 'shelve')
        self.shelve =, protocol=pickle.HIGHEST_PROTOCOL)
    def open_flydra_db(self):
        self.flydra_db = FlydraDB(os.path.join(, 'sac_flydra_db'))
    def list_groups(self):
        """ Returns a list of the groups. """
        return natsorted(list(self.groups.keys()))
    def list_all_samples(self):
        """ Returns a list of all samples for all groups. """
        return natsorted(list(self.sample2group.keys()))
    def list_samples(self, group):
        """ Lists the samples in the given group. """
        return natsorted(list(self.groups[group].samples))
    def list_all_configurations(self):
        """ Lists all the configurations present in the data. """
        return natsorted(self.configurations)
    def list_configurations(self, group):
        """ Lists the configurations for the given group. """
        return natsorted(list(self.groups[group].configurations.keys()))
    def get_group_for_sample(self, sample):
        """ Returns the sample associated to the group. """
        return self.sample2group[sample]
    def get_saccades_for_group(self, group, configuration):
        """ Returns the saccades for the given group and configuration. 
            If configuration is not passed, we use the default.
        if self.use_flydra_db:
            table = 'groupsaccades_%s' % configuration
            if self.flydra_db.has_sample(group) and \
               self.flydra_db.has_table(group, table):
                t = self.flydra_db.get_table(group, table)
                #value = t.copy()
                value = t
                return value 
        if self.use_cache:
            key = str(('get_saccades_for_group', group, configuration))
            if key in self.shelve:
                return self.shelve[key]
        filename = self.groups[group].configurations[configuration]
        saccades = saccades_read_mat(filename)
        if self.use_flydra_db:
            if not self.flydra_db.has_sample(group):
            self.flydra_db.set_table(group, table, saccades)
        if self.use_cache:
            self.shelve[key] = saccades
        return saccades
    def group_has_experimental_data(self, group):
        """ Returns true if this group has the raw orientation data.
            (mamarama has only saccades data. ) """
        return self.groups[group].has_experimental_data
    def has_experimental_data(self, sample):
        """ Returns true if this sample has the raw orientation data. """
        return sample in self.sample2expmat or sample in self. sample2exppickle

    def get_saccades_for_sample(self, sample, configuration):
        """ Returns the saccades for the given group and configuration. 
            If configuration is not passed, we use the default.
        if self.use_flydra_db:
            table = 'saccades_%s' % configuration
            if self.flydra_db.has_sample(sample) and \
               self.flydra_db.has_table(sample, table):
                t = self.flydra_db.get_table(sample, table)
                #value =  t.copy()
                value = t
                return value 
        if self.use_cache:
            key = str(('get_saccades_for_sample', sample, configuration))
            if key in self.shelve:
                return self.shelve[key]
        group = self.get_group_for_sample(sample)
        group_saccades  = self.get_saccades_for_group(group, configuration) 

        print group_saccades[0].dtype
#        with open('tmp.pickle','w') as f:
#            pickle.dump(f, group_saccades)        
        mine = group_saccades[:]['sample'] == sample
        saccades = group_saccades[mine]
        if len(saccades) == 0:
            raise Exception('No saccades found for %s' % sample)
        if self.use_flydra_db:
            if not self.flydra_db.has_sample(sample):

            self.flydra_db.set_table(sample, table, saccades)
        if self.use_cache:
            self.shelve[key] = saccades
        return saccades
    def get_experimental_data(self, sample):
#        if self.use_flydra_db:
#            table = 'tethered_data'
#            if self.flydra_db.has_sample(sample) and \
#               self.flydra_db.has_table(sample, table):
#                t = self.flydra_db.get_table(sample, table)
#                #value =  t.copy()
#                value = t
#                #self.flydra_db.release_table(t)
#                return value 
        if self.use_cache:
            if sample in self.shelve:
                return self.shelve[sample]
        if sample in self.sample2expmat:
            data =[sample], squeeze_me=True)
            data = data['data']
            # convert from array to hash
            assert isinstance(data, numpy.ndarray)
            data = dict(map(lambda field: (field, data[field]), data.dtype.fields))
            # convert from array to string
            for k in list(data.keys()):
                if data[k].dtype.char == 'U':
                    data[k] = str(data[k])
            # make sure everything is 1d array
            def as1d(x):  
                if x.dtype == 'object':
                    x = x.tolist()
                return x.reshape(len(x))
            data['exp_orientation'] = as1d(data['exp_orientation'])
            data['exp_timestamps'] = as1d(data['exp_timestamps'])
        elif sample in self.sample2exppickle:
            with open(self.sample2exppickle[sample], 'rb') as f:
                data = cPickle.load(f)      
            raise Exception('no data for sample %s found' % sample)
#        if self.use_flydra_db:
#            if not self.flydra_db.has_sample(sample):
#                self.flydra_db.add_sample(sample)
#            self.flydra_db.set_table(sample, table, data)
        if self.use_cache:
            self.shelve[sample] = data
        return data
    def __getstate__(self):
        # do not pickle the shelve
        all = dict(self.__dict__)
        all['shelve'] = None
        all['flydra_db'] = None
        return all
def main_filter(args):
    parser = LenientOptionParser()
    parser.add_option("--db", default='flydra_db', help="FlydraDB directory")

    parser.add_option("--min_frames_per_track", default=400,
        help="Minimum number of frames per track [= %default]")

                      help="Stop interactively on problems with log files'\
                      '(e.g.: cannot find valid obj_ids) [default: %default]",
                      default=False, action="store_true")

                      help="Smoothing dynamical model [default: %default]",
                      default="mamarama, units: mm")
    parser.add_option("--debug_output", help="Creates debug figures.",
                      default=False, action="store_true")

    parser.add_option("--nocache", help="Ignores already computed results.",
                      default=False, action="store_true")

    parser.add_option("--smoothing", help="Uses Kalman-smoothed data.",
                      default=False, action="store_true")

    (options, args) = parser.parse_args(args)
    table_name = 'rows' # TODO: use constant
    table_version = "smooth" if options.smoothing else "kf"
    if not args:
        raise UserError('No files or directories specified.')
    if not os.path.exists(options.db):

    db = FlydraDB(options.db)

    good_files = get_good_files(where=args, pattern="*.kh5",

    if len(good_files) == 0:
        logger.error("No good files to process")

    n = len(good_files)
    for i in range(n):
        (filename, obj_ids, stim_fname) = good_files[i]
       'Sample %s/%s: %s' % (i + 1, n, filename))
        # only maintain basename
        stim = os.path.splitext(os.path.basename(stim_fname))[0]
        sample_id = os.path.splitext(os.path.basename(filename))[0]
           "File %d/%d %s %s %s " % 
                    (i, n, str(filename), str(obj_ids), stim_fname))
        if (db.has_sample(sample_id) 
            and db.has_table(sample_id, table_name, table_version)
            and not options.nocache):
  'Sample %r already computed; skipping.'
                        ' (use --nocache to ignore)' % sample_id)
        all_data = [] 

        for obj_id, rows in get_good_smoothed_tracks(#@UnusedVariable

            filtered = filter_rows(rows, options)
        if not all_data:
  'Not enough data found for %r; skipping.' % sample_id)

        if not db.has_sample(sample_id):
        db.set_attr(sample_id, 'stim_fname', stim_fname)
        db.set_attr(sample_id, 'stimulus', stim)
        stim_xml = open(stim_fname).read()
        db.set_attr(sample_id, 'stimulus_xml', stim_xml)
        geometry = get_posts_info(stim_xml)
        db.set_attr(sample_id, 'posts', geometry['posts'])
        if 'arena' in geometry:
            db.set_attr(sample_id, 'arena', geometry['arena'])

        db.add_sample_to_group(sample_id, stim)
        if stim != 'nopost':
            db.add_sample_to_group(sample_id, 'posts')

        rows = numpy.concatenate(all_data)
        db.set_attr(sample_id, 'filter_time',"%Y%m%d_%H%M%S"))
        db.set_attr(sample_id, 'filter_host', platform.node())
        db.set_attr(sample_id, 'filter_user', get_user())
        db.set_attr(sample_id, 'filter_python_version', platform.python_version())
        db.set_attr(sample_id, 'filter_numpy_version', numpy.version.version)