Example #1
0
    def h**o(self, _type='pearson', _sim='latent'):
        """ Hmophily test -- table output
            Parameters
            ==========
            _type: similarity type in (contengency, pearson)
            _sim: similarity metric in (natural, latent)
        """
        if self.model is None: return
        expe = self.expe
        figs = []

        Y = self._Y
        N = Y[0].shape[0]
        model = self.model

        self.log.info('using `%s\' type' % _type)

        if not hasattr(self.gramexp, 'tables'):
            corpuses = self.specname(self.gramexp.get_set('corpus'))
            models = self.gramexp.get_set('model')
            tables = {}
            corpuses = self.specname(self.gramexp.get_set('corpus'))
            for m in models:
                if _type == 'pearson':
                    Meas = ['pearson coeff', '2-tailed pvalue']
                    table = np.empty((len(corpuses), len(Meas), len(Y)))
                elif _type == 'contingency':
                    Meas = ['natural', 'latent', 'natural', 'latent']
                    table = np.empty((2 * len(corpuses), len(Meas), len(Y)))
                tables[m] = table

            self.gramexp.Meas = Meas
            self.gramexp.tables = tables
            table = tables[expe.model]
        else:
            table = self.gramexp.tables[expe.model]
            Meas = self.gramexp.Meas

        if _type == 'pearson':
            self.log.info('using `%s\' similarity' % _sim)
            # No variance for link expecation !!!
            Y = [Y[0]]

            ### Global degree
            d, dc, yerr = random_degree(Y)
            sim = model.similarity_matrix(sim=_sim)
            #plot(sim, title='Similarity', sort=True)
            #plot_degree(sim)
            for it_dat, data in enumerate(Y):
                #homo_object = data
                homo_object = model.likelihood()
                table[self.corpus_pos, :,
                      it_dat] = sp.stats.pearsonr(homo_object.flatten(),
                                                  sim.flatten())

        elif _type == 'contingency':

            ### Global degree
            d, dc, yerr = random_degree(Y)
            sim_nat = model.similarity_matrix(sim='natural')
            sim_lat = model.similarity_matrix(sim='latent')
            step_tab = len(self.specname(self.gramexp.get_set('corpus')))
            for it_dat, data in enumerate(Y):

                #homo_object = data
                homo_object = model.likelihood()

                table[self.corpus_pos, 0, it_dat] = sim_nat[data == 1].mean()
                table[self.corpus_pos, 1, it_dat] = sim_lat[data == 1].mean()
                table[self.corpus_pos, 2, it_dat] = sim_nat[data == 1].var()
                table[self.corpus_pos, 3, it_dat] = sim_lat[data == 1].var()
                table[self.corpus_pos + step_tab, 0,
                      it_dat] = sim_nat[data == 0].mean()
                table[self.corpus_pos + step_tab, 1,
                      it_dat] = sim_lat[data == 0].mean()
                table[self.corpus_pos + step_tab, 2,
                      it_dat] = sim_nat[data == 0].var()
                table[self.corpus_pos + step_tab, 3,
                      it_dat] = sim_lat[data == 0].var()

        if self._it == self.expe_size - 1:
            for _model, table in self.gramexp.tables.items():
                # Function in (utils. ?)
                # Mean and standard deviation
                table_mean = np.char.array(np.around(
                    table.mean(2), decimals=3)).astype("|S20")
                table_std = np.char.array(np.around(table.std(2),
                                                    decimals=3)).astype("|S20")
                table = table_mean + b' $\pm$ ' + table_std

                # Table formatting
                corpuses = self.specname(self.gramexp.get_set('corpus'))
                try:
                    table = np.column_stack((corpuses, table))
                except:
                    table = np.column_stack((corpuses * 2, table))
                tablefmt = 'simple'  # 'latex'
                table = tabulate(table,
                                 headers=['__' + _model.upper() + '__'] + Meas,
                                 tablefmt=tablefmt,
                                 floatfmt='.3f')
                print()
                print(table)
                if expe._write:
                    base = '%s_homo_%s' % (self.specname(_model), _type)
                    self.write_frames(table, base=base, ext='md')
Example #2
0
    def roc_evolution(self,
                      _type='testset',
                      _type2='max',
                      _ratio=20,
                      _type3='errorbar'):
        ''' AUC difference between two models against testset_ratio
            * _type : learnset/testset
            * _type2 : max/min/mean
            * _ratio : ration of the traning set to predict. If 100 _predictall will be true

        '''
        expe = self.expe
        model = self.model
        data = self.frontend.data
        _ratio = int(_ratio)
        _predictall = (_ratio >= 100) or (_ratio < 0)
        if not hasattr(expe, 'testset_ratio'):
            setattr(expe, 'testset_ratio', 20)
            self.testset_ratio_pos = 0
        else:
            self.testset_ratio_pos = self.pt['testset_ratio']

        table, Meas = self.init_roc_tables()

        #mask = model.get_mask()
        if _type == 'testset':
            y_true, probas = model.mask_probas(data)
            if not _predictall:
                # take 20% of the size of the training set
                n_d = int(_ratio / 100 * data.size *
                          (1 - expe.testset_ratio / 100) / (1 - _ratio / 100))
                y_true = y_true[:n_d]
                probas = probas[:n_d]
            else:
                pass

        elif _type == 'learnset':
            n = int(data.size * _ratio)
            mask_index = np.unravel_index(
                np.random.permutation(data.size)[:n], data.shape)
            y_true = data[mask_index]
            probas = model.likelihood()[mask_index]

        # Just the ONE:1
        #idx_1 = (y_true == 1)
        #idx_0 = (y_true == 0)
        #size_1 = idx_1.sum()
        #y_true = np.hstack((y_true[idx_1], y_true[idx_0][:size_1]))
        #probas = np.hstack((probas[idx_1], probas[idx_0][:size_1]))

        fpr, tpr, thresholds = roc_curve(y_true, probas)
        roc_auc = auc(fpr, tpr)

        table[self.corpus_pos, self.testset_ratio_pos, self.pt['_repeat'],
              self.model_pos] = roc_auc

        #precision, recall, thresholds = precision_recall_curve( y_true, probas)
        #plt.plot(precision, recall, label='PR curve; %s' % (expe.model ))

        if self._it == self.expe_size - 1:

            # Reduce each repetitions
            take_type = getattr(np, _type2)
            t = ma.array(np.empty(table[:, :, 0, :].shape), mask=True)
            t[:, :, 0] = take_type(table[:, :, :, 0], -1)
            t[:, :, 1] = take_type(table[:, :, :, 1], -1)
            table_mean = t.copy()
            t[:, :, 0] = table[:, :, :, 0].std(-1)
            t[:, :, 1] = table[:, :, :, 1].std(-1)
            table_std = t

            # Measure is comparaison of two AUC.
            id_mmsb = [
                i for i, s in enumerate(self.gramexp.exp_tensor['model'])
                if s.endswith('mmsb_cgs')
            ][0]
            id_ibp = 1 if id_mmsb == 0 else 0
            table_mean = table_mean[:, :, id_mmsb] - table_mean[:, :, id_ibp]
            table_std = table_std[:, :, id_mmsb] + table_std[:, :, id_ibp]

            if _type2 != 'mean':
                table_std = [None] * len(table_std)

            fig = plt.figure()
            corpuses = self.specname(self.gramexp.get_set('corpus'))
            for i in range(len(corpuses)):
                if _type3 == 'errorbar':
                    plt.errorbar(list(map(int, Meas)),
                                 table_mean[i],
                                 yerr=table_std[i],
                                 fmt=_markers.next(),
                                 label=corpuses[i])
                elif _type3 == 'boxplot':
                    bplot = table[i, :, :, 0] - table[i, :, :, 1]
                    plt.boxplot(bplot.T, labels=corpuses[i])
                    fig.gca().set_xticklabels(Meas)

            plt.errorbar(Meas, [0] * len(Meas), linestyle='--', color='k')
            plt.legend(loc='lower left', prop={'size': 7})

            # Table formatting
            #table = table_mean + b' $\pm$ ' + table_std
            table = table_mean

            corpuses = self.specname(self.gramexp.get_set('corpus'))
            table = np.column_stack((self.specname(corpuses), table))
            tablefmt = 'simple'
            headers = [''] + Meas
            table = tabulate(table,
                             headers=headers,
                             tablefmt=tablefmt,
                             floatfmt='.3f')
            print()
            print(table)
            if expe._write:
                base = '%s_%s_%s' % (_type, _type2, _ratio)
                figs = {
                    'roc_evolution':
                    ExpSpace({
                        'fig': fig,
                        'table': table,
                        'base': base
                    })
                }
                self.write_frames(figs)
Example #3
0
    def pvalue(self, _type='global'):
        """ similar to zipf but compute pvalue and print table

            Parameters
            ==========
            _type: str in [global, local, feature]
        """
        if self.model is None: return
        expe = self.expe
        figs = []

        Y = self._Y
        N = Y[0].shape[0]
        model = self.model

        Table, Meas = self.init_fit_tables(_type, Y)

        self.log.info('using `%s\' burstiness' % _type)

        if _type == 'global':
            ### Global degree
            for it_dat, data in enumerate(Y):
                d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
                gof = gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_dat] = gof[v]

        elif _type == 'local':
            ### Z assignement method
            a, b = model.get_params()
            N, K = a.shape
            print('theta shape: %s' % (str((N, K))))
            now = Now()
            if 'mmsb' in expe.model:
                ZZ = []
                for _i, _ in enumerate(Y):
                    #for _ in Y: # Do not reflect real local degree !
                    theta = self._Theta[_i]
                    phi = self._Phi[_i]
                    Z = np.empty((2, N, N))
                    order = np.arange(N**2).reshape((N, N))
                    if expe.symmetric:
                        triu = np.triu_indices(N)
                        order = order[triu]
                    else:
                        order = order.flatten()
                    order = zip(*np.unravel_index(order, (N, N)))

                    for i, j in order:
                        Z[0, i, j] = categorical(theta[i])
                        Z[1, i, j] = categorical(theta[j])
                    Z[0] = np.triu(Z[0]) + np.triu(Z[0], 1).T
                    Z[1] = np.triu(Z[1]) + np.triu(Z[1], 1).T
                    ZZ.append(Z)
                self.log.info('Z formation %s second', nowDiff(now))

            clustering = 'modularity'
            comm = model.communities_analysis(data=Y[0], clustering=clustering)
            print('clustering method: %s, active clusters ratio: %f' %
                  (clustering, len(comm['block_hist'] > 0) / K))

            local_degree_c = {}
            ### Iterate over all classes couple
            if expe.symmetric:
                #k_perm = np.unique( map(list, map(set, itertools.product(np.unique(clusters) , repeat=2))))
                k_perm = np.unique(
                    list(
                        map(
                            list,
                            map(
                                list,
                                map(set, itertools.product(range(K),
                                                           repeat=2))))))
            else:
                #k_perm = itertools.product(np.unique(clusters) , repeat=2)
                k_perm = itertools.product(range(K), repeat=2)

            for it_k, c in enumerate(k_perm):
                if isinstance(c, (np.int64, np.float64)):
                    k = l = c
                elif len(c) == 2:
                    # Stochastic Equivalence (extra class bind
                    k, l = c
                    #continue
                else:
                    # Comunnities (intra class bind)
                    k = l = c.pop()
                #if i > expe.limit_class:
                #   break
                if k != l:
                    continue

                degree_c = []
                YY = []
                if 'mmsb' in expe.model:
                    for y, z in zip(Y, ZZ):  # take the len of ZZ if < Y
                        y_c = y.copy()
                        phi_c = np.zeros(y.shape)
                        # UNDIRECTED !
                        phi_c[(z[0] == k) &
                              (z[1] == l
                               )] = 1  #; phi_c[(z[0] == l) & (z[1] == k)] = 1
                        y_c[phi_c != 1] = 0
                        #degree_c += adj_to_degree(y_c).values()
                        #yerr= None
                        YY.append(y_c)
                elif 'ilfm' in expe.model:
                    for _i, y in enumerate(Y):
                        theta = self._Theta[_i]
                        YY.append(
                            (y *
                             np.outer(theta[:, k], theta[:, l])).astype(int))

                d, dc, yerr = random_degree(YY)
                if len(d) == 0: continue
                gof = gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_k] = gof[v]

        elif _type == 'feature':
            raise NotImplementedError

        if self._it == self.expe_size - 1:
            for _model, table in self.gramexp.tables.items():

                # Mean and standard deviation
                table_mean = np.char.array(np.around(
                    table.mean(2), decimals=3)).astype("|S20")
                table_std = np.char.array(np.around(table.std(2),
                                                    decimals=3)).astype("|S20")
                table = table_mean + b' $\pm$ ' + table_std

                # Table formatting
                corpuses = self.specname(self.gramexp.get_set('corpus'))
                table = np.column_stack((self.specname(corpuses), table))
                tablefmt = 'simple'
                table = tabulate(table,
                                 headers=['__' + _model.upper() + '__'] + Meas,
                                 tablefmt=tablefmt,
                                 floatfmt='.3f')
                print()
                print(table)
                if expe._write:
                    if expe._mode == 'predictive':
                        base = '%s_%s_%s' % (self.specname(
                            expe.corpus), self.specname(_model), _type)
                    else:
                        base = '%s_%s_%s' % ('MG', self.specname(_model),
                                             _type)
                    self.write_frames(table, base=base, ext='md')