예제 #1
0
def test_six_letter_sp_name(long_name, six_name):
    assert six_letter_sp_name(long_name) == six_name
예제 #2
0
def fish_weekly_corr(rootdir, fish_tracks_ds, feature, link_method):
    """

    :param fish_tracks_ds:
    :param feature:
    :param link_method:
    :return:
    """
    species = fish_tracks_ds['species'].unique()
    first = True

    for species_i in species:
        print(species_i)
        fish_tracks_ds_sp = fish_tracks_ds.loc[
            fish_tracks_ds.species == species_i, ['FishID', 'ts', feature]]
        fish_tracks_ds_sp = fish_tracks_ds_sp.pivot(columns='FishID',
                                                    values=feature,
                                                    index='ts')
        individ_corr = fish_tracks_ds_sp.corr()

        mask = np.ones(individ_corr.shape, dtype='bool')
        mask[np.triu_indices(len(individ_corr))] = False
        corr_val_f = individ_corr.values[mask]

        if first:
            corr_vals = pd.DataFrame(
                corr_val_f, columns=[six_letter_sp_name(species_i)[0]])
            first = False
        else:
            corr_vals = pd.concat([
                corr_vals,
                pd.DataFrame(corr_val_f,
                             columns=[six_letter_sp_name(species_i)[0]])
            ],
                                  axis=1)

        X = individ_corr.values
        d = sch.distance.pdist(X)
        L = sch.linkage(d, method=link_method)
        ind = sch.fcluster(L, 0.5 * d.max(), 'distance')
        cols = [
            individ_corr.columns.tolist()[i] for i in list((np.argsort(ind)))
        ]
        individ_corr = individ_corr[cols]
        individ_corr = individ_corr.reindex(cols)

        fish_sex = fish_tracks_ds.loc[fish_tracks_ds.species == species_i,
                                      ['FishID', 'sex']].drop_duplicates()
        fish_sex = list(fish_sex.sex)
        fish_sex_clus = [fish_sex[i] for i in list((np.argsort(ind)))]

        f, ax = plt.subplots(figsize=(7, 5))
        sns.heatmap(data=individ_corr,
                    vmin=-1,
                    vmax=1,
                    xticklabels=fish_sex_clus,
                    yticklabels=fish_sex_clus,
                    cmap='seismic',
                    ax=ax)
        plt.tight_layout()
        plt.savefig(
            os.path.join(
                rootdir, "{0}_corr_by_30min_{1}_{2}_{3}.png".format(
                    species_i.replace(' ', '-'), feature, dt.date.today(),
                    link_method)))
        plt.close()

    corr_vals_long = pd.melt(corr_vals,
                             var_name='species_six',
                             value_name='corr_coef')

    f, ax = plt.subplots(figsize=(3, 5))
    sns.boxplot(data=corr_vals_long,
                x='corr_coef',
                y='species_six',
                ax=ax,
                fliersize=0)
    sns.stripplot(data=corr_vals_long,
                  x='corr_coef',
                  y='species_six',
                  color=".2",
                  ax=ax,
                  size=3)
    ax.set(xlabel='Correlation', ylabel='Species')
    ax.set(xlim=(-1, 1))
    ax = plt.axvline(0, ls='--', color='k')
    plt.tight_layout()
    plt.savefig(
        os.path.join(
            rootdir,
            "fish_corr_coefs_{0}_{1}.png".format(feature, dt.date.today())))
    plt.close()

    return corr_vals
예제 #3
0
# Allows user to select top directory and load all als files here
root = Tk()
root.withdraw()
root.update()
rootdir = askdirectory(parent=root)
root.destroy()

feature_v = load_feature_vectors(rootdir, "*als_fv2.csv")

# add species and  species_six
feature_v['species'] = 'undefined'
feature_v['species_six'] = 'undefined'
for id_n, id in enumerate(feature_v.fish_ID):
    sp = extract_meta(id)['species']
    feature_v.loc[id_n, 'species'] = sp
    feature_v.loc[id_n, 'species_six'] = six_letter_sp_name(sp)

# renaming misspelled species name
feature_v = feature_v.replace('Aalcal', 'Altcal')
feature_v = feature_v.replace('Aaltolamprologus-calvus',
                              'Altolamprologus-calvus')

species = feature_v['species'].unique()

tribe_col = tribe_cols()
metrics_path = '/Users/annikanichols/Desktop/cichlid_species_database.xlsx'
sp_metrics = add_metrics(feature_v.species_six.unique(), metrics_path)

feature_v['tribe'] = 'undefined'
for species_n in feature_v.species_six.unique():
    if sp_metrics.loc[sp_metrics['species_abbreviation'] == species_n,
예제 #4
0
def plot_spd_30min_combined_daily(fish_tracks_ds_i, feature, ymax, span_max,
                                  ylabeling, change_times_datetime_i, rootdir,
                                  sp_metrics, tribe_col):
    """  Plot ridge plot of each species from a down sampled fish_tracks pandas structure

    :param fish_tracks_ds_i:
    :param feature:
    :param ymax:
    :param span_max:
    :param ylabeling:
    :return: averages: average speed for each
    inspiration from https://matplotlib.org/matplotblog/posts/create-ridgeplots-in-matplotlib/
    """
    species = fish_tracks_ds_i['species'].unique()
    date_form = DateFormatter('%H:%M:%S')

    gs = grid_spec.GridSpec(len(species), 1)
    fig = plt.figure(figsize=(4, 14))
    ax_objs = []

    # order species by clustering, sort  by tribe
    species_sort = fish_tracks_ds_i.loc[:,
                                        ['species', 'tribe']].drop_duplicates(
                                        ).sort_values(
                                            'tribe').species.to_list()

    for species_n, species_name in enumerate(species_sort):
        tribe = fish_tracks_ds_i.loc[fish_tracks_ds_i["species"] ==
                                     species_name].tribe.unique()[0]

        # # get speeds for each individual for a given species
        spd = fish_tracks_ds_i[fish_tracks_ds_i.species == species_name][[
            feature, 'FishID', 'ts'
        ]]
        sp_spd = spd.pivot(columns='FishID', values=feature, index='ts')

        # get time of day so that the same tod for each fish can be averaged
        sp_spd['time_of_day'] = sp_spd.apply(lambda row: str(row.name)[11:16],
                                             axis=1)
        sp_spd_ave = sp_spd.groupby('time_of_day').mean()
        sp_spd_ave_std = sp_spd_ave.std(axis=1)
        daily_feature = sp_spd_ave.mean(axis=1)

        # create time vector in datetime format
        date_time_obj = []
        for i in daily_feature.index:
            date_time_obj.append(
                dt.datetime.strptime(i, '%H:%M') +
                timedelta(days=(365.25 * 70), hours=12))

        #  add first point at end so that there is plotting until midnight
        daily_feature = pd.concat([
            daily_feature,
            pd.Series(data=daily_feature.iloc[0], index=['24:00'])
        ])
        date_time_obj.append(date_time_obj[-1] + timedelta(hours=0.5))

        # creating new axes object
        ax_objs.append(fig.add_subplot(gs[species_n:species_n + 1, 0:]))
        # days_to_plot = (date_time_obj[-1] - date_time_obj[0]).days + 1
        day_n = 0
        # for day_n in range(days_to_plot):
        ax_objs[-1].fill_between([
            dt.datetime.strptime("1970-1-2 00:00:00", '%Y-%m-%d %H:%M:%S') +
            timedelta(days=day_n),
            change_times_datetime_i[0] + timedelta(days=day_n)
        ], [span_max, span_max],
                                 0,
                                 color='lightblue',
                                 alpha=0.5,
                                 linewidth=0,
                                 zorder=1)
        ax_objs[-1].fill_between([
            change_times_datetime_i[0] + timedelta(days=day_n),
            change_times_datetime_i[1] + timedelta(days=day_n)
        ], [span_max, span_max],
                                 0,
                                 color='wheat',
                                 alpha=0.5,
                                 linewidth=0)
        ax_objs[-1].fill_between([
            change_times_datetime_i[2] + timedelta(days=day_n),
            change_times_datetime_i[3] + timedelta(days=day_n)
        ], [span_max, span_max],
                                 0,
                                 color='wheat',
                                 alpha=0.5,
                                 linewidth=0)
        ax_objs[-1].fill_between([
            change_times_datetime_i[3] + timedelta(days=day_n),
            change_times_datetime_i[4] + timedelta(days=day_n)
        ], [span_max, span_max],
                                 0,
                                 color='lightblue',
                                 alpha=0.5,
                                 linewidth=0)

        # plotting the distribution
        ax_objs[-1].plot(date_time_obj, daily_feature, lw=1, color='w')
        ax_objs[-1].fill_between(date_time_obj,
                                 daily_feature,
                                 0,
                                 color=tribe_col[tribe],
                                 zorder=2)

        # setting uniform x and y lims
        ax_objs[-1].set_xlim(
            min(date_time_obj),
            dt.datetime.strptime("1970-1-3 00:00:00", '%Y-%m-%d %H:%M:%S'))
        ax_objs[-1].set_ylim(0, ymax)

        # make background transparent
        rect = ax_objs[-1].patch
        rect.set_alpha(0)

        if species_n == len(species) - 1:
            ax_objs[-1].set_xlabel("Time", fontsize=10, fontweight="bold")
            ax_objs[-1].xaxis.set_major_locator(MultipleLocator(20))
            ax_objs[-1].xaxis.set_major_formatter(date_form)
            ax_objs[-1].yaxis.tick_right()
            ax_objs[-1].yaxis.set_label_position("right")
            ax_objs[-1].set_ylabel(ylabeling)

        else:
            # remove borders, axis ticks, and labels
            ax_objs[-1].set_xticklabels([])
            ax_objs[-1].set_xticks([])
            ax_objs[-1].set_yticks([])
            ax_objs[-1].set_yticklabels([])
            ax_objs[-1].set_ylabel('')

        spines = ["top", "right", "left", "bottom"]
        for s in spines:
            ax_objs[-1].spines[s].set_visible(False)

        short_name = six_letter_sp_name(species_name)
        ax_objs[-1].text(1,
                         0,
                         short_name[0],
                         fontweight="bold",
                         fontsize=10,
                         ha="right",
                         rotation=-45)
        gs.update(hspace=-0.1)
    plt.savefig(
        os.path.join(
            rootdir, "{0}_30min_combined_species_daily_{1}.png".format(
                feature, dt.date.today())))
    plt.close('all')
    return
예제 #5
0
def plot_spd_30min_combined(fish_tracks_ds_i, feature, ymax, span_max,
                            ylabeling, change_times_datetime_i, rootdir):
    """  Plot ridge plot of each species from a down sampled fish_tracks pandas structure

    :param fish_tracks_ds_i:
    :param feature:
    :param ymax:
    :param span_max:
    :param ylabeling:
    :return: averages: average speed for each
    inspiration from https://matplotlib.org/matplotblog/posts/create-ridgeplots-in-matplotlib/
    """
    fish_IDs = fish_tracks_ds_i['FishID'].unique()
    species = fish_tracks_ds_i['species'].unique()

    cmap = cm.get_cmap('turbo')
    colour_array = np.arange(0, 1, 1 / len(species))

    date_form = DateFormatter('%H:%M:%S')

    gs = grid_spec.GridSpec(len(species), 1)
    fig = plt.figure(figsize=(16, 9))
    ax_objs = []
    averages = np.zeros([len(species), 303])

    first = 1
    for species_n, species_name in enumerate(species):
        # get speeds for each individual for a given species
        feature_i = fish_tracks_ds_i[fish_tracks_ds_i.species ==
                                     species_name][[feature, 'FishID', 'ts']]
        sp_feature = feature_i.pivot(columns='FishID',
                                     values=feature,
                                     index='ts')
        if first:
            sp_feature_combined = sp_feature
            first = 0
        else:
            frames = [sp_feature_combined, sp_feature]
            sp_feature_combined = pd.concat(frames, axis=1)

        # calculate ave and stdv
        average = sp_feature.mean(axis=1)
        if np.shape(average)[0] > 303:
            averages[species_n, :] = average[0:303]
        else:
            # if data short then pad the data end with np.NaNs
            data_len = np.shape(average)[0]
            averages[species_n, :] = np.pad(average[0:data_len],
                                            (0, 303 - data_len),
                                            'constant',
                                            constant_values=(np.NaN, np.NaN))

        stdv = sp_feature.std(axis=1)
        # create time vector in datetime format
        # tv = fish_tracks_bin.loc[fish_tracks_bin.FishID == fish_IDs[0], 'ts']
        date_time_obj = []
        for i in sp_feature.index:
            date_time_obj.append(dt.datetime.strptime(i, '%Y-%m-%d %H:%M:%S'))

        # creating new axes object
        ax_objs.append(fig.add_subplot(gs[species_n:species_n + 1, 0:]))

        days_to_plot = (date_time_obj[-1] - date_time_obj[0]).days + 1

        for day_n in range(days_to_plot):
            ax_objs[-1].fill_between([
                dt.datetime.strptime("1970-1-2 00:00:00", '%Y-%m-%d %H:%M:%S')
                + timedelta(days=day_n),
                change_times_datetime_i[0] + timedelta(days=day_n)
            ], [span_max, span_max],
                                     0,
                                     color='lightblue',
                                     alpha=0.5,
                                     linewidth=0,
                                     zorder=1)
            ax_objs[-1].fill_between([
                change_times_datetime_i[0] + timedelta(days=day_n),
                change_times_datetime_i[1] + timedelta(days=day_n)
            ], [span_max, span_max],
                                     0,
                                     color='wheat',
                                     alpha=0.5,
                                     linewidth=0)
            ax_objs[-1].fill_between([
                change_times_datetime_i[2] + timedelta(days=day_n),
                change_times_datetime_i[3] + timedelta(days=day_n)
            ], [span_max, span_max],
                                     0,
                                     color='wheat',
                                     alpha=0.5,
                                     linewidth=0)
            ax_objs[-1].fill_between([
                change_times_datetime_i[3] + timedelta(days=day_n),
                change_times_datetime_i[4] + timedelta(days=day_n)
            ], [span_max, span_max],
                                     0,
                                     color='lightblue',
                                     alpha=0.5,
                                     linewidth=0)

        # plotting the distribution
        ax_objs[-1].plot(date_time_obj, average, lw=1, color='w')
        ax_objs[-1].fill_between(date_time_obj,
                                 average,
                                 0,
                                 color=cmap(colour_array[species_n]),
                                 zorder=2)

        # setting uniform x and y lims
        ax_objs[-1].set_xlim(
            min(date_time_obj),
            dt.datetime.strptime("1970-1-8 08:30:00", '%Y-%m-%d %H:%M:%S'))
        ax_objs[-1].set_ylim(0, ymax)

        # make background transparent
        rect = ax_objs[-1].patch
        rect.set_alpha(0)

        if species_n == len(species) - 1:
            ax_objs[-1].set_xlabel("Time", fontsize=10, fontweight="bold")
            ax_objs[-1].xaxis.set_major_locator(MultipleLocator(20))
            ax_objs[-1].xaxis.set_major_formatter(date_form)
            ax_objs[-1].yaxis.tick_right()
            ax_objs[-1].yaxis.set_label_position("right")
            ax_objs[-1].set_ylabel(ylabeling)

        else:
            # remove borders, axis ticks, and labels
            ax_objs[-1].set_xticklabels([])
            ax_objs[-1].set_xticks([])
            ax_objs[-1].set_yticks([])
            ax_objs[-1].set_yticklabels([])
            ax_objs[-1].set_ylabel('')

        spines = ["top", "right", "left", "bottom"]
        for s in spines:
            ax_objs[-1].spines[s].set_visible(False)

        short_name = six_letter_sp_name(species_name)
        ax_objs[-1].text(0.9,
                         0,
                         short_name[0],
                         fontweight="bold",
                         fontsize=10,
                         ha="right",
                         rotation=-45)
        gs.update(hspace=-0.1)
    plt.savefig(
        os.path.join(
            rootdir, "{0}_30min_combined_species_{1}.png".format(
                feature, dt.date.today())))
    plt.close('all')
    aves_feature = pd.DataFrame(averages.T,
                                columns=species,
                                index=date_time_obj[0:averages.shape[1]])
    return aves_feature, date_time_obj, sp_feature_combined
예제 #6
0
def plot_combined_v_position(rootdir, fish_tracks_ds, fish_diel_patterns):
    """ Bar plot of day and night vertical position means for each species sorted by the day/night preference

    :param rootdir:
    :param fish_tracks_ds:
    :param fish_diel_patterns:
    :return:
    """
    fish_tracks_dn = fish_tracks_ds.groupby(['daynight', 'FishID',
                                             'species']).mean().reset_index()
    fish_tracks_dn['species_six'] = six_letter_sp_name(fish_tracks_dn.species)

    # dn_index = fish_tracks_dn.groupby(by=['species_six', 'daynight']).median().reset_index()
    # sorted_index = dn_index.drop(dn_index[dn_index.daynight == 'd'].index).set_index('species_six').sort_values(by='vertical_pos').index
    sorted_index = fish_diel_patterns.groupby(
        'species_six').median().sort_values(by='day_night_dif').index

    grped_bplot = sns.catplot(x='species_six',
                              y='vertical_pos',
                              kind="box",
                              height=6,
                              aspect=4,
                              legend=False,
                              hue='daynight',
                              boxprops=dict(alpha=0.7),
                              fliersize=1,
                              order=sorted_index,
                              data=fish_tracks_dn,
                              palette="bwr_r")

    cmap = plt.cm.get_cmap('bwr_r')
    grped_bplot = sns.stripplot(x='species_six',
                                y='vertical_pos',
                                hue='daynight',
                                data=fish_tracks_dn,
                                order=sorted_index,
                                palette=[cmap(0), cmap(1000)],
                                size=3,
                                dodge=True)
    grped_bplot.set_xticklabels(labels=sorted_index, rotation=90)
    grped_bplot.set(ylabel='Vertical position', xlabel='Species')
    grped_bplot.set(ylim=[0, 1])
    plt.tight_layout()
    plt.savefig(
        os.path.join(
            rootdir, "species_vertical_pos_30min_box_strip_{0}.png".format(
                dt.date.today())))

    fish_tracks_dt = fish_tracks_ds.groupby(['daytime', 'FishID',
                                             'species']).mean().reset_index()
    fish_tracks_dt['species_six'] = six_letter_sp_name(fish_tracks_dt.species)
    cmap_f = plt.cm.get_cmap('flare')
    grped_bplot = sns.catplot(x='species_six',
                              y='vertical_pos',
                              kind="box",
                              height=6,
                              aspect=4,
                              legend=False,
                              hue='daytime',
                              boxprops=dict(alpha=0.5),
                              fliersize=1,
                              order=sorted_index,
                              data=fish_tracks_dt,
                              palette=[cmap_f(20),
                                       cmap(0),
                                       cmap(1000)])

    grped_bplot = sns.stripplot(x='species_six',
                                y='vertical_pos',
                                hue='daytime',
                                data=fish_tracks_dt,
                                order=sorted_index,
                                palette=[cmap_f(20),
                                         cmap(0),
                                         cmap(1000)],
                                size=3,
                                dodge=True)
    grped_bplot.set_xticklabels(labels=sorted_index, rotation=90)
    grped_bplot.set(ylabel='Vertical position', xlabel='Species')
    grped_bplot.set(ylim=[0, 1])
    plt.tight_layout()
    plt.savefig(
        os.path.join(
            rootdir,
            "species_vertical_pos_30min_box_strip_day-night-cres_{0}.png".
            format(dt.date.today())))
예제 #7
0
def crepuscular_peaks(feature, fish_tracks_ds, species, fish_diel_patterns):
    """ Uses borders to find peaks in the twilight periods (2h -/+ of sunup/down). Finds peak position  and height.
    Uses the defined diel pattern to define baseline, nocturnal = night, diurnal = day, undefined = mean od day and
    night

    # Returns: peak height, peak location, dawn/dusk, max day/night for that day, if  peak missing, find most common
    peak, if all peaks missing use average of the whole period location and use the value of that bin.
    Find amplitude of peaks

    :param feature:
    :param fish_tracks_ds:
    :param species:
    :param fish_diel_patterns: df with: 'FishID', 'peak_amplitude', 'peak', 'twilight', 'species', 'species_six'
    :return:
    """

    fishes = fish_tracks_ds.FishID.unique()

    # define borders
    border_top = np.ones(48)
    border_bottom = np.ones(48) * 1.05
    dawn_border_bottom = copy.copy(border_bottom)
    dawn_border_bottom[6 * 2:(8 * 2) + 1] = 0
    dusk_border_bottom = copy.copy(border_bottom)
    dusk_border_bottom[18 * 2:(20 * 2) + 1] = 0

    border = np.zeros(48)
    day_border = copy.copy(border)
    day_border[8 * 2:18 * 2] = 1
    night_border = copy.copy(border)
    night_border[0:6 * 2] = 1
    night_border[20 * 2:24 * 2] = 1

    peak_prom = 0.15
    if feature == 'speed_mm':
        border_top = border_top * 200
        dawn_border_bottom = dawn_border_bottom * 200
        dusk_border_bottom = dusk_border_bottom * 200
        peak_prom = 7

    first_all = True
    for species_name in species:
        fish_feature = fish_tracks_ds.loc[
            fish_tracks_ds.species == species_name,
            ['ts', 'FishID', feature]].pivot(columns='FishID',
                                             values=feature,
                                             index='ts')
        first = True
        for i in np.arange(0, len(fish_feature.columns)):
            epoques = np.arange(0, 48 * 7.5, 48).astype(int)

            # create dummies
            fish_peaks_dawn = np.zeros([
                4,
                int(
                    np.floor(fish_feature.iloc[:, i].reset_index().shape[0] /
                             48))
            ])
            fish_peaks_dusk = np.zeros([
                4,
                int(
                    np.floor(fish_feature.iloc[:, i].reset_index().shape[0] /
                             48))
            ])

            for j in np.arange(0, int(np.ceil(fish_feature.shape[0] / 48))):
                x = fish_feature.iloc[epoques[j]:epoques[j + 1], i]
                if x.size == 48:
                    dawn_peak, dawn_peak_prop = find_peaks(
                        x,
                        distance=4,
                        prominence=peak_prom,
                        height=(dawn_border_bottom[0:x.shape[0]],
                                border_top[0:x.shape[0]]))

                    dusk_peak, dusk_peak_prop = find_peaks(
                        x,
                        distance=4,
                        prominence=peak_prom,
                        height=(dusk_border_bottom[0:x.shape[0]],
                                border_top[0:x.shape[0]]))

                    # fish_peaks data: position of peak within 24h, position of peak within week, raw peak height,
                    # raw peak height - baseline
                    if dawn_peak.size != 0:
                        fish_peaks_dawn[0, j] = dawn_peak[0]
                        fish_peaks_dawn[1, j] = dawn_peak[0] + epoques[j]
                        fish_peaks_dawn[2, j] = np.round(
                            dawn_peak_prop['peak_heights'][0], 2)

                    if dusk_peak.size != 0:
                        fish_peaks_dusk[0, j] = dusk_peak[0]
                        fish_peaks_dusk[1, j] = dusk_peak[0] + epoques[j]
                        fish_peaks_dusk[2, j] = np.round(
                            dusk_peak_prop['peak_heights'][0], 2)

                    day_mean = np.round(x[day_border.astype(int) == 1].mean(),
                                        2)
                    night_mean = np.round(
                        x[night_border.astype(int) == 1].mean(), 2)
                    daynight_mean = np.round(
                        x[(night_border + day_border).astype(int) == 1].mean(),
                        2)

                    # how the baseline is chosen is dependent on the diel pattern of the fish (predefined)
                    pattern = fish_diel_patterns.loc[
                        fish_diel_patterns.FishID == fish_feature.columns[i],
                        'species_diel_pattern'].values[0]
                    if pattern == 'nocturnal':
                        fish_peaks_dawn[3,
                                        j] = fish_peaks_dawn[2, j] - night_mean
                        fish_peaks_dusk[3,
                                        j] = fish_peaks_dusk[2, j] - night_mean
                    elif pattern == 'diurnal':
                        fish_peaks_dawn[3,
                                        j] = fish_peaks_dawn[2, j] - day_mean
                        fish_peaks_dusk[3,
                                        j] = fish_peaks_dusk[2, j] - day_mean
                    elif pattern == 'undefined':
                        fish_peaks_dawn[3,
                                        j] = fish_peaks_dawn[2,
                                                             j] - daynight_mean
                        fish_peaks_dusk[3,
                                        j] = fish_peaks_dusk[2,
                                                             j] - daynight_mean
                    else:
                        print("pattern not known, stopping function on {}".
                              format(fish_feature.columns[i]))
                        return

            fish_peaks_dawn = replace_crep_peaks(fish_peaks_dawn, fish_feature,
                                                 i, epoques)
            fish_peaks_dusk = replace_crep_peaks(fish_peaks_dusk, fish_feature,
                                                 i, epoques)

            fish_peaks_df_dawn = make_fish_peaks_df(fish_peaks_dawn,
                                                    fish_feature.columns[i])
            fish_peaks_df_dusk = make_fish_peaks_df(fish_peaks_dusk,
                                                    fish_feature.columns[i])

            fish_peaks_df_dawn['twilight'] = 'dawn'
            fish_peaks_df_dusk['twilight'] = 'dusk'
            fish_peaks_df = pd.concat([fish_peaks_df_dawn, fish_peaks_df_dusk],
                                      axis=0)

            if first:
                species_peaks_df = fish_peaks_df
                first = False
            else:
                species_peaks_df = pd.concat([species_peaks_df, fish_peaks_df],
                                             axis=0)
        species_peaks_df['species'] = species_name

        if first_all:
            all_peaks_df = species_peaks_df
            first_all = False
        else:
            all_peaks_df = pd.concat([all_peaks_df, species_peaks_df], axis=0)

    all_peaks_df = all_peaks_df.reset_index(drop=True)
    all_peaks_df['peak'] = (all_peaks_df.peak_loc != 0) * 1

    # average for each fish for dawn and dusk for 'peak_amplitude', peaks/(peaks+nonpeaks)
    periods = ['dawn', 'dusk']
    first_all = True
    for species_name in species:
        first = True
        for period in periods:
            feature_i = all_peaks_df[(all_peaks_df['species'] == species_name)
                                     & (all_peaks_df['twilight'] == period)][[
                                         'peak_amplitude', 'FishID',
                                         'crep_num', 'peak'
                                     ]]
            sp_average_peak_amp = feature_i.groupby(
                'FishID').mean().peak_amplitude.reset_index()
            sp_average_peak = feature_i.groupby(
                'FishID').mean().peak.reset_index()
            sp_average_peak_data = pd.concat(
                [sp_average_peak_amp, sp_average_peak], axis=1)
            sp_average_peak_data['twilight'] = period
            if first:
                sp_feature_combined = sp_average_peak_data
                first = False
            else:
                sp_feature_combined = pd.concat(
                    [sp_feature_combined, sp_average_peak_data], axis=0)
        sp_feature_combined['species'] = species_name

        if first_all:
            all_feature_combined = sp_feature_combined
            first_all = False
        else:
            all_feature_combined = pd.concat(
                [all_feature_combined, sp_feature_combined], axis=0)
    all_feature_combined = all_feature_combined.reset_index(drop=True)
    all_feature_combined = all_feature_combined.loc[:, ~all_feature_combined.
                                                    columns.duplicated()]

    all_feature_combined['species_six'] = 'blank'
    for fish in fishes:
        all_feature_combined.loc[all_feature_combined['FishID'] == fish,
                                 'species_six'] = six_letter_sp_name(
                                     extract_meta(fish)['species'])[0]

    return all_feature_combined
예제 #8
0
    rootdir = askdirectory(parent=root)
    root.destroy()

    fish_tracks_bin = load_ds_als_files(rootdir, "*als_30m.csv")
    fish_tracks_bin = fish_tracks_bin.reset_index(drop=True)
    fish_tracks_bin['time_of_day_dt'] = fish_tracks_bin.ts.apply(lambda row: int(str(row)[11:16][:-3]) * 60 + int(str(row)[11:16][-2:]))
    fish_tracks_bin.loc[fish_tracks_bin.species == 'Aaltolamprologus calvus', 'species'] = 'Altolamprologus calvus'
    fish_tracks_bin.FishID = fish_tracks_bin.FishID.str.replace('Aaltolamprologus', 'Altolamprologus')

    # get each fish ID and all species
    fish_IDs = fish_tracks_bin['FishID'].unique()
    species = fish_tracks_bin['species'].unique()

    # reorganising
    # species_short = shorten_sp_name(species)
    species_sixes = six_letter_sp_name(species)

    tribe_col = tribe_cols()

    metrics_path = '/Users/annikanichols/Desktop/cichlid_species_database.xlsx'
    sp_metrics = add_metrics(species_sixes, metrics_path)

    # add species six name and tribe
    fish_tracks_bin['species_six'] = fish_tracks_bin.apply(lambda row: six_letter_sp_name(row.species)[0], axis=1)
    fish_tracks_bin = pd.merge(fish_tracks_bin, sp_metrics.loc[:, ['species_six', 'tribe']], how='left')

    # get timings
    fps, tv_ns, tv_sec, tv_24h_sec, num_days, tv_s_type, change_times_s, change_times_ns, change_times_h, day_ns, day_s,\
        change_times_d, change_times_m = load_timings(fish_tracks_bin[fish_tracks_bin.FishID == fish_IDs[0]].shape[0])
    change_times_unit = [7*2, 7.5*2, 18.5*2, 19*2]
    change_times_datetime = [dt.datetime.strptime("1970-1-2 07:00:00", '%Y-%m-%d %H:%M:%S'),