Beispiel #1
0
    def accuracy(self, plot=True):
        nap = self.dissimilarity()

        acc = nap.pivot_table(values='dist', index=['layer', 'geon', 'fno', 'dimension'], columns='kind').reset_index()
        acc['accuracy'] = acc['non-accidental'] > acc['metric']

        if self.bootstrap:
            dfs = []
            for layer in acc.layer.unique():
                sel = acc[acc.layer==layer]['accuracy']
                pct = stats.bootstrap_resample(sel, ci=None, func=np.mean)
                d = OrderedDict([('kind', ['nap'] * len(pct)),
                                 ('layer', [layer]*len(pct)),
                                 ('accuracy', sel.mean()),
                                 ('iter', range(len(pct))),
                                 ('bootstrap', pct)])
                dfs.append(pandas.DataFrame.from_dict(d))
            df = pandas.concat(dfs)

        else:
            df = acc.groupby('layer').mean().reset_index()
            df['kind'] = 'nap'
            df['iter'] = 0
            df['bootstrap'] = np.nan

        if self.task == 'run' and plot:
            self.plot_single(df, 'acc')
        return df
Beispiel #2
0
 def _corr_all_orig(self, pref):
     df = []
     for dim in self.myexp.dims:
         dim_data = load(pref='dis', exp=self.myexp.exp, suffix=dim)[dim]
         if dim_data.ndim == 3:
             dim_data = np.mean(dim_data, axis=0)
         for depth, model_name in self.myexp.models:
             self.myexp.set_model(model_name)
             dis = self.myexp.dissimilarity()
             layer = dis.keys()[-1]
             dis = dis[layer]
             corr = stats.corr(dis, dim_data, sel='upper')
             if self.myexp.bootstrap:
                 print('bootstrapping stats...')
                 bf = stats.bootstrap_resample(
                     dis,
                     dim_data,
                     func=stats.corr,
                     ci=None,
                     seed=0,
                     sel='upper',
                     struct=self.dims[dim].ravel())
                 for i, b in enumerate(bf):
                     df.append([dim, depth, model_name, layer, corr, i, b])
             else:
                 df.append([dim, depth, model_name, layer, corr, 0, np.nan])
     df = pandas.DataFrame(df,
                           columns=[
                               'kind', 'depth', 'models', 'layer',
                               'correlation', 'iter', 'bootstrap'
                           ])
     self.save(df, pref=pref)
     return df
Beispiel #3
0
    def accuracy(self, plot=True):
        nap = self.dissimilarity()

        acc = nap.pivot_table(values='dist',
                              index=['layer', 'geon', 'fno', 'dimension'],
                              columns='kind').reset_index()
        acc['accuracy'] = acc['non-accidental'] > acc['metric']

        if self.bootstrap:
            dfs = []
            for layer in acc.layer.unique():
                sel = acc[acc.layer == layer]['accuracy']
                pct = stats.bootstrap_resample(sel, ci=None, func=np.mean)
                d = OrderedDict([('kind', ['nap'] * len(pct)),
                                 ('layer', [layer] * len(pct)),
                                 ('accuracy', sel.mean()),
                                 ('iter', range(len(pct))),
                                 ('bootstrap', pct)])
                dfs.append(pandas.DataFrame.from_dict(d))
            df = pandas.concat(dfs)

        else:
            df = acc.groupby('layer').mean().reset_index()
            df['kind'] = 'nap'
            df['iter'] = 0
            df['bootstrap'] = np.nan

        if self.task == 'run' and plot:
            self.plot_single(df, 'acc')
        return df
Beispiel #4
0
    def compare(self, pref, ylim=[-0.1, 1]):
        print()
        print("{:=^50}".format(" " + pref + " "))
        df = self.get_data_all(pref, kind="compare")
        if hasattr(self.myexp, "behav"):
            behav = self.myexp.behav()
        else:
            behav = None

        if behav is not None:
            rels = {"shape": stats.bootstrap_resample(behav.dissimilarity, func=np.mean)}
        else:
            rels = None

        if pref == "dis_group_diff":
            values = "preference for perceived shape"
            df = df.rename(columns={"preference": values})
            self.plot_all(df, values, "diff", pref=pref, ceiling=None, color=self.myexp.colors["shape"], ylim=ylim)
        elif pref == "pred_corr":
            values = "correlation"
            df["kind"] = "shape"
            # df = df.rename(columns={'preference': values})
            behav = self.myexp.behav()
            behav = behav.pivot_table(index=["kind", "subjid"], columns="no", values="acc")
            # for subset in df.dataset.unique():
            #     self.myexp.set_subset(subset)
            #     rel = stats.reliability(behav.loc[subset])
            #     rel = ((1+rel[0])/2., (1+rel[1])/2.)
            self.plot_all(
                df,
                values,
                "consistency",
                col="dataset",
                pref=pref,
                ceiling=None,
                color=self.myexp.colors["shape"],
                ylim=ylim,
            )
        else:
            if self.myexp.exp == "fonts":
                values = "clustering accuracy"
                df = df.rename(columns={"dissimilarity": values})
            else:
                values = "accuracy"
            for dim in self.myexp.dims:
                ceiling = None if rels is None else rels[dim]
                self.plot_all(
                    df[df.kind == dim], values, dim, pref=pref, ceiling=ceiling, color=self.myexp.colors[dim], ylim=ylim
                )

        if self.myexp.bootstrap:
            bf = self.bootstrap_ttest_grouped(df)
            if self.myexp.bootstrap:
                if self.myexp.html is not None:
                    self.myexp.html.writetable(bf, caption="bootstrapped t-test (one-tailed, rel. samples)")
Beispiel #5
0
    def corr(self):
        dis = self.dissimilarity()
        df = []
        nname = models.NICE_NAMES[self.model_name].lower()
        for dim in self.dims:
            dim_data = load(pref='dis', exp=self.exp, suffix=dim)
            if dim_data is None:
                name = self.model_name
                self.set_model(dim)
                dim_data = self.dissimilarity()
                self.set_model(name)
                if dim_data is None:
                    raise Exception('dimension data %s cannot be obtained' %
                                    dim)

            dim_data = dim_data[dim]
            if dim_data.ndim == 3:
                dim_data = np.mean(dim_data, axis=0)
            struct = self.dims[dim] if self.exp in ['fonts', 'stefania'
                                                    ] else None
            if self.filter:
                dim_data = dim_data[self.sel][:, self.sel]
                struct = None
            for layer, data in dis.items():
                d = data[self.sel][:, self.sel] if self.filter else data
                corr = stats.corr(d, dim_data, sel='upper')
                if self.bootstrap:
                    print('bootstrapping stats...')
                    bf = stats.bootstrap_resample(d,
                                                  dim_data,
                                                  func=stats.corr,
                                                  ci=None,
                                                  seed=0,
                                                  sel='upper',
                                                  struct=struct)
                    for i, b in enumerate(bf):
                        df.append([dim, nname, layer, corr, i, b])
                else:
                    df.append([dim, nname, layer, corr, 0, np.nan])
        df = pandas.DataFrame(df,
                              columns=[
                                  'kind', 'models', 'layer', 'correlation',
                                  'iter', 'bootstrap'
                              ])
        self.save(df, pref='corr')
        if self.task == 'run':
            self.plot_single(df, 'corr')
        return df
Beispiel #6
0
    def corr(self):
        dis = self.dissimilarity()
        df = []
        nname = models.NICE_NAMES[self.model_name].lower()
        for dim in self.dims:
            dim_data = load(pref="dis", exp=self.exp, suffix=dim)
            if dim_data is None:
                name = self.model_name
                self.set_model(dim)
                dim_data = self.dissimilarity()
                self.set_model(name)
                if dim_data is None:
                    raise Exception("dimension data %s cannot be obtained" % dim)

            dim_data = dim_data[dim]
            if dim_data.ndim == 3:
                dim_data = np.mean(dim_data, axis=0)
            struct = self.dims[dim] if self.exp in ["fonts", "stefania"] else None
            if self.filter:
                dim_data = dim_data[self.sel][:, self.sel]
                struct = None
            for layer, data in dis.items():
                d = data[self.sel][:, self.sel] if self.filter else data
                corr = stats.corr(d, dim_data, sel="upper")
                if self.bootstrap:
                    print("bootstrapping stats...")
                    bf = stats.bootstrap_resample(
                        d, dim_data, func=stats.corr, ci=None, seed=0, sel="upper", struct=struct
                    )
                    for i, b in enumerate(bf):
                        df.append([dim, nname, layer, corr, i, b])
                else:
                    df.append([dim, nname, layer, corr, 0, np.nan])
        df = pandas.DataFrame(df, columns=["kind", "models", "layer", "correlation", "iter", "bootstrap"])
        self.save(df, pref="corr")
        if self.task == "run":
            self.plot_single(df, "corr")
        return df
Beispiel #7
0
 def _corr_all_orig(self, pref):
     df = []
     for dim in self.myexp.dims:
         dim_data = load(pref="dis", exp=self.myexp.exp, suffix=dim)[dim]
         if dim_data.ndim == 3:
             dim_data = np.mean(dim_data, axis=0)
         for depth, model_name in self.myexp.models:
             self.myexp.set_model(model_name)
             dis = self.myexp.dissimilarity()
             layer = dis.keys()[-1]
             dis = dis[layer]
             corr = stats.corr(dis, dim_data, sel="upper")
             if self.myexp.bootstrap:
                 print("bootstrapping stats...")
                 bf = stats.bootstrap_resample(
                     dis, dim_data, func=stats.corr, ci=None, seed=0, sel="upper", struct=self.dims[dim].ravel()
                 )
                 for i, b in enumerate(bf):
                     df.append([dim, depth, model_name, layer, corr, i, b])
             else:
                 df.append([dim, depth, model_name, layer, corr, 0, np.nan])
     df = pandas.DataFrame(df, columns=["kind", "depth", "models", "layer", "correlation", "iter", "bootstrap"])
     self.save(df, pref=pref)
     return df
Beispiel #8
0
 def bootstrap_resample(r):
     if n_boot == 0 or n_boot is None:
         return (np.nan, np.nan)
     else:
         return stats.bootstrap_resample(r, ci=ci, niter=n_boot)
Beispiel #9
0
    def compare(self, pref, ylim=[-.1, 1]):
        print()
        print('{:=^50}'.format(' ' + pref + ' '))
        df = self.get_data_all(pref, kind='compare')
        if hasattr(self.myexp, 'behav'):
            behav = self.myexp.behav()
        else:
            behav = None

        if behav is not None:
            rels = {
                'shape':
                stats.bootstrap_resample(behav.dissimilarity, func=np.mean)
            }
        else:
            rels = None

        if pref == 'dis_group_diff':
            values = 'preference for perceived shape'
            df = df.rename(columns={'preference': values})
            self.plot_all(df,
                          values,
                          'diff',
                          pref=pref,
                          ceiling=None,
                          color=self.myexp.colors['shape'],
                          ylim=ylim)
        elif pref == 'pred_corr':
            values = 'correlation'
            df['kind'] = 'shape'
            # df = df.rename(columns={'preference': values})
            behav = self.myexp.behav()
            behav = behav.pivot_table(index=['kind', 'subjid'],
                                      columns='no',
                                      values='acc')
            # for subset in df.dataset.unique():
            #     self.myexp.set_subset(subset)
            #     rel = stats.reliability(behav.loc[subset])
            #     rel = ((1+rel[0])/2., (1+rel[1])/2.)
            self.plot_all(df,
                          values,
                          'consistency',
                          col='dataset',
                          pref=pref,
                          ceiling=None,
                          color=self.myexp.colors['shape'],
                          ylim=ylim)
        else:
            if self.myexp.exp == 'fonts':
                values = 'clustering accuracy'
                df = df.rename(columns={'dissimilarity': values})
            else:
                values = 'accuracy'
            for dim in self.myexp.dims:
                ceiling = None if rels is None else rels[dim]
                self.plot_all(df[df.kind == dim],
                              values,
                              dim,
                              pref=pref,
                              ceiling=ceiling,
                              color=self.myexp.colors[dim],
                              ylim=ylim)

        if self.myexp.bootstrap:
            bf = self.bootstrap_ttest_grouped(df)
            if self.myexp.bootstrap:
                if self.myexp.html is not None:
                    self.myexp.html.writetable(
                        bf,
                        caption='bootstrapped t-test (one-tailed, rel. samples)'
                    )
Beispiel #10
0
 def bootstrap_resample(r):
     if n_boot == 0 or n_boot is None:
         return (np.nan, np.nan)
     else:
         return stats.bootstrap_resample(r, ci=ci, niter=n_boot)