def prepare_data(path, arena, smooth, medfilt, only_laser, gts):

    LASER_THORAX_MAP = {True:THORAX,False:HEAD}

    #PROCESS SCORE FILES:
    pooldf = pd.DataFrame()
    for csvfile in sorted(glob.glob(path + "/*.csv")):

        #don't waste time smoothing files not in out genotype list
        _,_,_,_genotype,_laser,_ = flymad_analysis.extract_metadata_from_filename(csvfile)
        if _laser != only_laser:
            print "\tskipping laser", _laser, "!=", only_laser
            continue

        if _genotype not in gts:
            print "\tskipping genotype", _genotype, "!=", gts
            continue

        csvfilefn = os.path.basename(csvfile)
        cache_args = csvfilefn, arena, smoothstr
        cache_fname = csvfile+'.madplot-cache'

        results = madplot.load_bagfile_cache(cache_args, cache_fname)
        if results is None:
            results = flymad_analysis.load_and_smooth_csv(csvfile, arena, smooth)
            if results is not None:
                #update the cache
                madplot.save_bagfile_cache(results, cache_args, cache_fname)
            else:
                print "skipping", csvfile
                continue

        df,dt,experimentID,date,time,genotype,laser,repID = results

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < EXPERIMENT_DURATION:
            print "\tmissing data", csvfilefn, duration
            continue

        print "\t%ss experiment" % duration

        #we use zx to rotate by pi
        df['zx'][df['zx'] > 0] = math.pi

        #ROTATE by pi if orientation is east
        df['orientation'] = df['theta'] + df['zx']

        #ROTATE by pi if orientation is north/south (plusminus 0.25pi) and hemisphere does not match scoring:
        smask = df[df['as'] == 1]
        smask = smask[smask['orientation'] < 0.75*(math.pi)]
        smask = smask[smask['orientation'] > 0.25*(math.pi)]
        amask = df[df['as'] == 0]
        amask1 = amask[amask['orientation'] > -0.5*(math.pi)]
        amask1 = amask1[amask1['orientation'] < -0.25*(math.pi)]
        amask2 = amask[amask['orientation'] > 1.25*(math.pi)]
        amask2 = amask2[amask2['orientation'] < 1.5*(math.pi)]
        df['as'] = 0
        df['as'][smask.index] = math.pi
        df['as'][amask1.index] = math.pi
        df['as'][amask2.index] = math.pi
        df['orientation'] = df['orientation'] - df['as']
        df['orientation'] = df['orientation'].astype(float)

        df['orientation'][np.isfinite(df['orientation'])] = np.unwrap(df['orientation'][np.isfinite(df['orientation'])]) 
        #MAXIMUM SPEED = 300:
        df['v'][df['v'] >= 300] = np.nan

        #CALCULATE FORWARD VELOCITY
        df['Vtheta'] = np.arctan2(df['vy'], df['vx'])
        df['Vfwd'] = (np.cos(df['orientation'] - df['Vtheta'])) * df['v']
        df['Afwd'] = np.gradient(df['Vfwd'].values) / dt
        df['dorientation'] = np.gradient(df['orientation'].values) / dt

        try:
            df = flymad_analysis.align_t_by_laser_on(
                    df, min_experiment_duration=EXPERIMENT_DURATION,
                    align_first_only=False,
                    t_range=(-1,6),
                    min_num_ranges=5)
        except flymad_analysis.AlignError, err:
            print "\talign error %s (%s)" % (csvfilefn, err)
            continue

        #median filter
        if medfilt:
            df['Vfwd'] = scipy.signal.medfilt(df['Vfwd'].values, medfilt)

        df['obj_id'] = flymad_analysis.create_object_id(date,time)
        df['Genotype'] = genotype
        df['lasergroup'] = laser
        df['RepID'] = repID

        pooldf = pd.concat([pooldf, df]) 
Example #2
0
def prepare_data(path, resample_bin, gts):

    LASER_THORAX_MAP = {True: THORAX, False: HEAD}

    #PROCESS SCORE FILES:
    pooldf = pd.DataFrame()
    for df, metadata in flymad_analysis.load_courtship_csv(path):
        csvfilefn, experimentID, date, time, genotype, laser, repID = metadata

        dlaser = np.gradient(df['laser_state'].values)
        num_on_periods = (dlaser == 0.5).sum()
        if num_on_periods != 12:
            print "\tskipping file %s (%d laser on periods)" % (
                csvfilefn, num_on_periods / 2)
            continue

        if genotype not in gts:
            print "\tskipping genotype", genotype
            continue

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < EXPERIMENT_DURATION:
            print "\tmissing data", csvfilefn
            continue
        print "\t%ss experiment" % duration

        #make new columns that indicates HEAD/THORAX targeting
        thorax = True
        laser_state = False

        trg = []
        for i0, i1 in madplot.pairwise(df.iterrows()):
            t0idx, t0row = i0
            t1idx, t1row = i1
            if t1row['laser_state'] >= 0.5 and t0row['laser_state'] == 0:
                thorax ^= True
                laser_state = True
            elif t0row['laser_state'] >= 0.5 and t1row['laser_state'] == 0:
                laser_state = False
            trg.append(OFF if not laser_state else LASER_THORAX_MAP[thorax])
        trg.append(OFF)
        df['ttm'] = trg

        #resample into 5S bins
        df = df.resample(resample_bin, fill_method='ffill')
        #trim dataframe
        df = df.head(
            flymad_analysis.get_num_rows(EXPERIMENT_DURATION, resample_bin))
        tb = flymad_analysis.get_resampled_timebase(EXPERIMENT_DURATION,
                                                    resample_bin)

        #fix cols due to resampling
        df['laser_state'][df['laser_state'] > 0] = 1
        df['zx_binary'] = (df['zx'] > 0).values.astype(float)
        df['ttm'][df['ttm'] < 0] = HEAD
        df['ttm'][df['ttm'] > 0] = THORAX

        dlaser = np.gradient((df['laser_state'].values > 0).astype(int)) > 0
        t0idx = np.argmax(dlaser)
        t0 = tb[t0idx - 1]
        df['t'] = tb - t0

        #groupby on float times is slow. make a special align column
        df['t_align'] = np.array(range(0, len(df))) - t0idx

        df['obj_id'] = flymad_analysis.create_object_id(date, time)
        df['Genotype'] = genotype
        df['lasergroup'] = laser
        df['RepID'] = repID

        pooldf = pd.concat([pooldf, df])

    data = {}
    for gt in gts:
        gtdf = pooldf[pooldf['Genotype'] == gt]

        lgs = gtdf['lasergroup'].unique()
        if len(lgs) != 1:
            raise Exception("only one lasergroup handled for gt %s: not %s" %
                            (gt, lgs))

        grouped = gtdf.groupby(['t'], as_index=False)
        data[gt] = dict(mean=grouped.mean().astype(float),
                        std=grouped.std().astype(float),
                        n=grouped.count().astype(float),
                        first=grouped.first(),
                        df=gtdf)

    return data
def prepare_data(path, arena, smooth, medfilt, only_laser, gts):

    LASER_THORAX_MAP = {True: THORAX, False: HEAD}

    #PROCESS SCORE FILES:
    pooldf = pd.DataFrame()
    for csvfile in sorted(glob.glob(path + "/*.csv")):

        #don't waste time smoothing files not in out genotype list
        _, _, _, _genotype, _laser, _ = flymad_analysis.extract_metadata_from_filename(
            csvfile)
        if _laser != only_laser:
            print "\tskipping laser", _laser, "!=", only_laser
            continue

        if _genotype not in gts:
            print "\tskipping genotype", _genotype, "!=", gts
            continue

        csvfilefn = os.path.basename(csvfile)
        cache_args = csvfilefn, arena, smoothstr
        cache_fname = csvfile + '.madplot-cache'

        results = madplot.load_bagfile_cache(cache_args, cache_fname)
        if results is None:
            results = flymad_analysis.load_and_smooth_csv(
                csvfile, arena, smooth)
            if results is not None:
                #update the cache
                madplot.save_bagfile_cache(results, cache_args, cache_fname)
            else:
                print "skipping", csvfile
                continue

        df, dt, experimentID, date, time, genotype, laser, repID = results

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < EXPERIMENT_DURATION:
            print "\tmissing data", csvfilefn
            continue

        print "\t%ss experiment" % duration

        #we use zx to rotate by pi
        df['zx'][df['zx'] > 0] = math.pi

        #ROTATE by pi if orientation is east
        df['orientation'] = df['theta'] + df['zx']

        #ROTATE by pi if orientation is north/south (plusminus 0.25pi) and hemisphere does not match scoring:
        smask = df[df['as'] == 1]
        smask = smask[smask['orientation'] < 0.75 * (math.pi)]
        smask = smask[smask['orientation'] > 0.25 * (math.pi)]
        amask = df[df['as'] == 0]
        amask1 = amask[amask['orientation'] > -0.5 * (math.pi)]
        amask1 = amask1[amask1['orientation'] < -0.25 * (math.pi)]
        amask2 = amask[amask['orientation'] > 1.25 * (math.pi)]
        amask2 = amask2[amask2['orientation'] < 1.5 * (math.pi)]
        df['as'] = 0
        df['as'][smask.index] = math.pi
        df['as'][amask1.index] = math.pi
        df['as'][amask2.index] = math.pi
        df['orientation'] = df['orientation'] - df['as']
        df['orientation'] = df['orientation'].astype(float)

        df['orientation'][np.isfinite(df['orientation'])] = np.unwrap(
            df['orientation'][np.isfinite(df['orientation'])])
        #MAXIMUM SPEED = 300:
        df['v'][df['v'] >= 300] = np.nan

        #CALCULATE FORWARD VELOCITY
        df['Vtheta'] = np.arctan2(df['vy'], df['vx'])
        df['Vfwd'] = (np.cos(df['orientation'] - df['Vtheta'])) * df['v']
        df['Afwd'] = np.gradient(df['Vfwd'].values) / dt
        df['dorientation'] = np.gradient(df['orientation'].values) / dt

        try:
            df = flymad_analysis.align_t_by_laser_on(
                df,
                min_experiment_duration=EXPERIMENT_DURATION,
                align_first_only=False,
                t_range=(-1, 9),
                min_num_ranges=5)
        except flymad_analysis.AlignError, err:
            print "\talign error %s (%s)" % (csvfilefn, err)
            continue

        #median filter
        if medfilt:
            df['Vfwd'] = scipy.signal.medfilt(df['Vfwd'].values, medfilt)

        df['obj_id'] = flymad_analysis.create_object_id(date, time)
        df['Genotype'] = genotype
        df['lasergroup'] = laser
        df['RepID'] = repID

        pooldf = pd.concat([pooldf, df])
Example #4
0
def prepare_data(path, resample_bin, gts):

    LASER_THORAX_MAP = {True:THORAX,False:HEAD}

    #PROCESS SCORE FILES:
    pooldf = pd.DataFrame()
    for df,metadata in flymad_analysis.load_courtship_csv(path):
        csvfilefn,experimentID,date,time,genotype,laser,repID = metadata

        dlaser = np.gradient(df['laser_state'].values)
        num_on_periods = (dlaser == 0.5).sum()
        if num_on_periods != 12:
            print "\tskipping file %s (%d laser on periods)" % (csvfilefn, num_on_periods/2)
            continue

        if genotype not in gts:
            print "\tskipping genotype", genotype
            continue

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < EXPERIMENT_DURATION:
            print "\tmissing data", csvfilefn
            continue
        print "\t%ss experiment" % duration

        #make new columns that indicates HEAD/THORAX targeting
        thorax = True
        laser_state = False

        trg = []
        for i0,i1 in madplot.pairwise(df.iterrows()):
            t0idx,t0row = i0
            t1idx,t1row = i1
            if t1row['laser_state'] >= 0.5 and t0row['laser_state'] == 0:
                thorax ^= True
                laser_state = True
            elif t0row['laser_state'] >= 0.5 and t1row['laser_state'] == 0:
                laser_state = False
            trg.append(OFF if not laser_state else LASER_THORAX_MAP[thorax])
        trg.append(OFF)
        df['ttm'] = trg

        #resample into 5S bins
        df = df.resample(resample_bin, fill_method='ffill')
        #trim dataframe
        df = df.head(flymad_analysis.get_num_rows(EXPERIMENT_DURATION, resample_bin))
        tb = flymad_analysis.get_resampled_timebase(EXPERIMENT_DURATION, resample_bin)

        #fix cols due to resampling
        df['laser_state'][df['laser_state'] > 0] = 1
        df['zx_binary'] = (df['zx'] > 0).values.astype(float)
        df['ttm'][df['ttm'] < 0] = HEAD
        df['ttm'][df['ttm'] > 0] = THORAX

        dlaser = np.gradient( (df['laser_state'].values > 0).astype(int) ) > 0
        t0idx = np.argmax(dlaser)
        t0 = tb[t0idx-1]
        df['t'] = tb - t0

        #groupby on float times is slow. make a special align column
        df['t_align'] = np.array(range(0,len(df))) - t0idx

        df['obj_id'] = flymad_analysis.create_object_id(date,time)
        df['Genotype'] = genotype
        df['lasergroup'] = laser
        df['RepID'] = repID

        pooldf = pd.concat([pooldf, df])

    data = {}
    for gt in gts:
        gtdf = pooldf[pooldf['Genotype'] == gt]

        lgs = gtdf['lasergroup'].unique()
        if len(lgs) != 1:
            raise Exception("only one lasergroup handled for gt %s: not %s" % (
                             gt, lgs))

        grouped = gtdf.groupby(['t'], as_index=False)
        data[gt] = dict(mean=grouped.mean().astype(float),
                        std=grouped.std().astype(float),
                        n=grouped.count().astype(float),
                        first=grouped.first(),
                        df=gtdf)

    return data
Example #5
0
def prepare_data(path, arena, smoothstr, smooth, medfilt, gts):

    pooldf = DataFrame()
    for csvfile in sorted(glob.glob(path + "/*.csv")):
        cache_args = os.path.basename(csvfile), arena, smoothstr
        cache_fname = csvfile+'.madplot-cache'

        results = madplot.load_bagfile_cache(cache_args, cache_fname)
        if results is None:
            results = flymad_analysis.load_and_smooth_csv(csvfile, arena, smooth)
            if results is not None:
                #update the cache
                madplot.save_bagfile_cache(results, cache_args, cache_fname)
            else:
                print "skipping", csvfile
                continue

        df,dt,experimentID,date,time,genotype,laser,repID = results

        #we plot head v thorax v nolaser (so for the same of plotting, consider
        #these the genotypes
        genotype = genotype + '-' + laser

        if genotype not in gts:
            print "\tskipping genotype", genotype
            continue

        if 0:
            fig = plt.figure()
            fig.suptitle(os.path.basename(csvfile))
            ax = fig.add_subplot(1,1,1)
            df['experiment'] = 1
            df['tobj_id'] = 1
            madplot.plot_tracked_trajectory(ax, df, arena,
                        debug_plot=False,
                        color='k',
            )
            ax.add_patch(arena.get_patch(color='k', alpha=0.1))

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < EXPERIMENT_DURATION:
            print "\tmissing data", csvfilefn
            continue

        print "\t%ss experiment" % duration

        #MAXIMUM SPEED = 300:
        df['v'][df['v'] >= 300] = np.nan
        df['v'] = df['v'].fillna(method='ffill')

        try:
            df = flymad_analysis.align_t_by_laser_on(
                    df, min_experiment_duration=EXPERIMENT_DURATION,
                    align_first_only=True,
                    exact_num_ranges=1)
        except flymad_analysis.AlignError, err:
            print "\talign error %s (%s)" % (csvfile, err)
            continue

        #median filter
        if medfilt:
            df['v'] = scipy.signal.medfilt(df['v'].values, medfilt)

        df['obj_id'] = flymad_analysis.create_object_id(date,time)
        df['Genotype'] = genotype
        df['lasergroup'] = laser

        pooldf = pd.concat([pooldf, df])
Example #6
0
def prepare_data(path, arena, smoothstr, smooth, medfilt, gts):

    pooldf = DataFrame()
    for csvfile in sorted(glob.glob(path + "/*.csv")):
        cache_args = os.path.basename(csvfile), arena, smoothstr
        cache_fname = csvfile + '.madplot-cache'

        results = madplot.load_bagfile_cache(cache_args, cache_fname)
        if results is None:
            results = flymad_analysis.load_and_smooth_csv(
                csvfile, arena, smooth)
            if results is not None:
                #update the cache
                madplot.save_bagfile_cache(results, cache_args, cache_fname)
            else:
                print "skipping", csvfile
                continue

        df, dt, experimentID, date, time, genotype, laser, repID = results

        #we plot head v thorax v nolaser (so for the same of plotting, consider
        #these the genotypes
        genotype = genotype + '-' + laser

        if genotype not in gts:
            print "\tskipping genotype", genotype
            continue

        if 0:
            fig = plt.figure()
            fig.suptitle(os.path.basename(csvfile))
            ax = fig.add_subplot(1, 1, 1)
            df['experiment'] = 1
            df['tobj_id'] = 1
            madplot.plot_tracked_trajectory(
                ax,
                df,
                arena,
                debug_plot=False,
                color='k',
            )
            ax.add_patch(arena.get_patch(color='k', alpha=0.1))

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < EXPERIMENT_DURATION:
            print "\tmissing data", csvfilefn
            continue

        print "\t%ss experiment" % duration

        #MAXIMUM SPEED = 300:
        df['v'][df['v'] >= 300] = np.nan
        df['v'] = df['v'].fillna(method='ffill')

        try:
            df = flymad_analysis.align_t_by_laser_on(
                df,
                min_experiment_duration=EXPERIMENT_DURATION,
                align_first_only=True,
                exact_num_ranges=1)
        except flymad_analysis.AlignError, err:
            print "\talign error %s (%s)" % (csvfile, err)
            continue

        #median filter
        if medfilt:
            df['v'] = scipy.signal.medfilt(df['v'].values, medfilt)

        df['obj_id'] = flymad_analysis.create_object_id(date, time)
        df['Genotype'] = genotype
        df['lasergroup'] = laser

        pooldf = pd.concat([pooldf, df])