Ejemplo n.º 1
0
    def pvalue(self):
        ''' Compute Goodness of fit statistics '''
        expe = self.expe
        frontend = self.frontend
        data = frontend.data

        d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
        gof = gofit(d, dc)

        if not hasattr(self.gramexp, 'Table'):
            corpuses = self.specname(self.gramexp.get_set('corpus'))
            Meas = ['pvalue', 'alpha', 'x_min', 'n_tail']
            Table = np.empty((len(corpuses), len(Meas)))
            Table = np.column_stack((corpuses, Table))
            self.gramexp.Table = Table
            self.gramexp.Meas = Meas
        else:
            Table = self.gramexp.Table
            Meas = self.gramexp.Meas

        for i, v in enumerate(Meas):
            Table[self.corpus_pos, i + 1] = gof[v]

        if self._it == self.expe_size - 1:
            tablefmt = 'latex'
            print(colored('\nPvalue Table:', 'green'))
            print(
                self.tabulate(Table,
                              headers=Meas,
                              tablefmt=tablefmt,
                              floatfmt='.3f'))
Ejemplo n.º 2
0
    def burstiness(self, clusters_org='source', _type='local'):
        '''Zipf Analisis
           (global burstiness) + local burstiness + feature burstiness
        '''
        expe = self.expe
        frontend = self.frontend
        data = frontend.data
        figs = []

        # Global burstiness
        d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
        fig = plt.figure()
        plot_degree(data, spec=True, title=self.specname(expe.corpus))
        #plot_degree_poly(data, spec=True, title=expe.corpus)

        gof = gofit(d, dc)
        if not gof:
            return

        alpha = gof['alpha']
        x_min = gof['x_min']
        y_max = gof['y_max']
        # plot linear law from power law estimation
        #plt.figure()
        idx = d.searchsorted(x_min)
        i = int(idx - 0.1 * len(d))
        idx = i if i >= 0 else idx
        x = d[idx:]
        ylin = np.exp(-alpha * np.log(x / float(x_min)) + np.log(y_max))
        #ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log((alpha-1)/x_min))

        # Hack xticks
        fig.canvas.draw()  # !
        lim = plt.gca().get_xlim()  # !
        locs, labels = plt.xticks()

        idx_xmin = locs.searchsorted(x_min)
        locs = np.insert(locs, idx_xmin, x_min)
        labels.insert(idx_xmin, plt.Text(text='x_min'))
        plt.xticks(locs, labels)
        plt.gca().set_xlim(lim)

        fit = np.polyfit(np.log(d), np.log(dc), deg=1)
        poly_fit = fit[0] * np.log(d) + fit[1]
        diff = np.abs(poly_fit[-1] - np.log(ylin[-1]))
        ylin = np.exp(np.log(ylin) + diff * 0.75)
        #\#

        plt.plot(x, ylin, 'g--', label='power %.2f' % alpha)
        figs.append(plt.gcf())

        # Local burstiness

        #
        # Get the Class/Cluster and local degree information
        # Reordering Adjacency Mmatrix based on Clusters/Class/Communities
        #
        clusters = None
        K = None
        if clusters_org == 'source':
            clusters = frontend.get_clusters()
        elif clusters_org == 'model':
            model = ModelManager.from_expe(expe, load=True)
            #clusters = model.get_clusters(K, skip=1)
            #clusters = model.get_communities(K)
            clusters = Louvain.get_clusters(frontend.to_directed(),
                                            resolution=10)
            if len(np.unique(clusters)) > 20 or True:
                clusters = Annealing(frontend.data,
                                     iterations=200,
                                     C_init=5,
                                     grow_rate=0).search()

        if clusters is None:
            lgg.error('No clusters here...passing')
            return
        else:
            block_hist = np.bincount(clusters)
            K = (block_hist != 0).sum()
            lgg.info('%d Clusters from `%s\':' % (K, clusters_org))

        expe.K = K
        assert (not 'model' in expe)
        expe.model = 'no_model'
        #data_r, labels= reorder_mat(data, clusters, labels=True)
        Table, Meas = self.init_fit_tables(_type=_type)

        # Just inner degree
        f = plt.figure()
        ax = f.gca()
        #f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True)

        # assume symmetric
        it_k = 0
        np.fill_diagonal(data, 0)
        for l in np.arange(K):
            for k in np.arange(K):
                if k != l:
                    continue

                ixgrid = np.ix_(clusters == k, clusters == l)

                if k == l:
                    title = 'Inner degree'
                    y = np.zeros(data.shape)  # some zeros...
                    y[ixgrid] = data[ixgrid]
                    #ax = ax1
                else:
                    title = 'Outer degree'
                    y = np.zeros(data.shape)  # some zeros...
                    y[ixgrid] = data[ixgrid]
                    #ax = ax2

                #
                title = ''
                #/#

                d, dc = degree_hist(adj_to_degree(y))
                if len(d) == 0: continue
                plot_degree_2((d, dc, None),
                              logscale=True,
                              colors=True,
                              line=True,
                              ax=ax,
                              title=title)

                gof = gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_k] = gof[v]  #* y.sum() / TOT
                it_k += 1

        plt.suptitle(self.specname(expe.corpus))
        figs.append(plt.gcf())

        # Features burstiness
        plt.figure()
        hist, label = sorted_perm(block_hist, reverse=True)
        bins = len(hist)
        plt.bar(range(bins), hist)
        plt.xticks(np.arange(bins) + 0.5, label)
        plt.xlabel('Class labels')
        plt.title('Blocks Size (max assignement)')
        figs.append(plt.gcf())

        if expe._write:
            self.write_frames(figs)

        if self._it == self.expe_size - 1:
            for _model, table in self.gramexp.tables.items():

                # Mean and standard deviation
                table_mean = np.char.array(np.around(
                    table.mean(2), decimals=3)).astype("|S20")
                table_std = np.char.array(np.around(table.std(2),
                                                    decimals=3)).astype("|S20")
                table = table_mean + b' $\pm$ ' + table_std

                # Table formatting
                corpuses = self.specname(self.gramexp.get_set('corpus'))
                table = np.column_stack((self.specname(corpuses), table))
                tablefmt = 'simple'
                table = self.tabulate(table,
                                      headers=['__' + _model.upper() + '__'] +
                                      Meas,
                                      tablefmt=tablefmt,
                                      floatfmt='.3f')
                print()
                print(table)
Ejemplo n.º 3
0
    def pvalue(self, _type='global'):
        """ similar to zipf but compute pvalue and print table

            Parameters
            ==========
            _type: str in [global, local, feature]
        """
        if self.model is None: return
        expe = self.expe
        figs = []

        Y = self._Y
        N = Y[0].shape[0]
        model = self.model

        Table, Meas = self.init_fit_tables(_type, Y)

        self.log.info('using `%s\' burstiness' % _type)

        if _type == 'global':
            ### Global degree
            for it_dat, data in enumerate(Y):
                d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
                gof = gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_dat] = gof[v]

        elif _type == 'local':
            ### Z assignement method
            a, b = model.get_params()
            N, K = a.shape
            print('theta shape: %s' % (str((N, K))))
            now = Now()
            if 'mmsb' in expe.model:
                ZZ = []
                for _i, _ in enumerate(Y):
                    #for _ in Y: # Do not reflect real local degree !
                    theta = self._Theta[_i]
                    phi = self._Phi[_i]
                    Z = np.empty((2, N, N))
                    order = np.arange(N**2).reshape((N, N))
                    if expe.symmetric:
                        triu = np.triu_indices(N)
                        order = order[triu]
                    else:
                        order = order.flatten()
                    order = zip(*np.unravel_index(order, (N, N)))

                    for i, j in order:
                        Z[0, i, j] = categorical(theta[i])
                        Z[1, i, j] = categorical(theta[j])
                    Z[0] = np.triu(Z[0]) + np.triu(Z[0], 1).T
                    Z[1] = np.triu(Z[1]) + np.triu(Z[1], 1).T
                    ZZ.append(Z)
                self.log.info('Z formation %s second', nowDiff(now))

            clustering = 'modularity'
            comm = model.communities_analysis(data=Y[0], clustering=clustering)
            print('clustering method: %s, active clusters ratio: %f' %
                  (clustering, len(comm['block_hist'] > 0) / K))

            local_degree_c = {}
            ### Iterate over all classes couple
            if expe.symmetric:
                #k_perm = np.unique( map(list, map(set, itertools.product(np.unique(clusters) , repeat=2))))
                k_perm = np.unique(
                    list(
                        map(
                            list,
                            map(
                                list,
                                map(set, itertools.product(range(K),
                                                           repeat=2))))))
            else:
                #k_perm = itertools.product(np.unique(clusters) , repeat=2)
                k_perm = itertools.product(range(K), repeat=2)

            for it_k, c in enumerate(k_perm):
                if isinstance(c, (np.int64, np.float64)):
                    k = l = c
                elif len(c) == 2:
                    # Stochastic Equivalence (extra class bind
                    k, l = c
                    #continue
                else:
                    # Comunnities (intra class bind)
                    k = l = c.pop()
                #if i > expe.limit_class:
                #   break
                if k != l:
                    continue

                degree_c = []
                YY = []
                if 'mmsb' in expe.model:
                    for y, z in zip(Y, ZZ):  # take the len of ZZ if < Y
                        y_c = y.copy()
                        phi_c = np.zeros(y.shape)
                        # UNDIRECTED !
                        phi_c[(z[0] == k) &
                              (z[1] == l
                               )] = 1  #; phi_c[(z[0] == l) & (z[1] == k)] = 1
                        y_c[phi_c != 1] = 0
                        #degree_c += adj_to_degree(y_c).values()
                        #yerr= None
                        YY.append(y_c)
                elif 'ilfm' in expe.model:
                    for _i, y in enumerate(Y):
                        theta = self._Theta[_i]
                        YY.append(
                            (y *
                             np.outer(theta[:, k], theta[:, l])).astype(int))

                d, dc, yerr = random_degree(YY)
                if len(d) == 0: continue
                gof = gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_k] = gof[v]

        elif _type == 'feature':
            raise NotImplementedError

        if self._it == self.expe_size - 1:
            for _model, table in self.gramexp.tables.items():

                # Mean and standard deviation
                table_mean = np.char.array(np.around(
                    table.mean(2), decimals=3)).astype("|S20")
                table_std = np.char.array(np.around(table.std(2),
                                                    decimals=3)).astype("|S20")
                table = table_mean + b' $\pm$ ' + table_std

                # Table formatting
                corpuses = self.specname(self.gramexp.get_set('corpus'))
                table = np.column_stack((self.specname(corpuses), table))
                tablefmt = 'simple'
                table = tabulate(table,
                                 headers=['__' + _model.upper() + '__'] + Meas,
                                 tablefmt=tablefmt,
                                 floatfmt='.3f')
                print()
                print(table)
                if expe._write:
                    if expe._mode == 'predictive':
                        base = '%s_%s_%s' % (self.specname(
                            expe.corpus), self.specname(_model), _type)
                    else:
                        base = '%s_%s_%s' % ('MG', self.specname(_model),
                                             _type)
                    self.write_frames(table, base=base, ext='md')
Ejemplo n.º 4
0
    def prop_process_local_me(self, frame, p=250):
        p = int(p)
        expe = self.expe

        # Force ONE epoch # bernoulli variance...
        expe.epoch = 1
        self._generate()

        Y = self._Y
        Theta = self._Theta
        Phi = self._Phi

        theta = Theta[0]
        phi = Phi[0]

        N = theta.shape[0]
        K = theta.shape[1]

        for _k1 in range(K):
            #for _k2 in range(K):

                n_to_zeros = N-p

                adj = self._local_likelihood(theta, phi, _k1)
                #if adj.dtype == np.dtype(float):
                #    adj = sp.stats.bernoulli.rvs(adj)

                #    _id1 = np.arange(N**2).reshape((N,N))[adj==1]
                #    _id0 = np.arange(N**2).reshape((N,N))[adj==0]
                #    nn1 = len(_id1) // 3
                #    nn0 = n_to_zeros - nn1
                #    if nn1 > 0:
                #        _idx1 = np.random.choice(_id1, nn1, replace=False)
                #        _idx0 = np.random.choice(_id0, nn0, replace=False)
                #        _idx = np.hstack((_idx0, _idx1))
                #    else:
                #        _idx = _id0

                #    idx = np.unravel_index(_idx, (N,N))
                _idx = np.random.choice(np.arange(N**2), n_to_zeros, replace=False)
                idx = np.unravel_index(_idx, (N,N))


                adj_n = adj.copy()
                adj_n[idx] = 0

                if 'ilfm' in expe.model:
                    g_n = nx.from_numpy_array(adj_n)
                    degree_n = dict(g_n.degree())
                else:
                    degree_n = dict((i,int(round(d))) for i,d in enumerate(adj_n.sum(1)))
                d_n, dc_n = degree_hist(degree_n, filter_zeros=False)

                if 'ilfm' in expe.model:
                    g = nx.from_numpy_array(adj)
                    degree = dict(g.degree())
                else:
                    degree = dict((i,int(round(d))) for i,d in enumerate(adj.sum(1)))
                d, dc = degree_hist(degree, filter_zeros=False)

                x = d_n
                y = []
                burstiness = defaultdict(lambda:0)
                normalize_deg = defaultdict(lambda:0)
                # Compute p(d(N)>n+1 | p(p)=n)
                for node, deg_n in degree_n.items():
                    if deg_n == 0:
                        continue

                    deg_N = degree[node]
                    if deg_N > deg_n:
                        burstiness[deg_n] += 1

                    normalize_deg[deg_n] +=1

                # Normalize
                for deg, total in normalize_deg.items():
                    burstiness[deg] = burstiness[deg] / total

                for deg in x:
                    y.append(burstiness[deg])

                ax = frame.ax()

                label = '%s K=%d' % (expe.model, _k1)
                ax.plot(x, y, label=label)

        ax.legend(loc=1,prop={'size':8})
        ax.set_xlabel('n')
        ax.set_ylabel('Cumulative Sum')
        frame.title = expe.corpus + ' p=%s' % p
Ejemplo n.º 5
0
    def prop_process_me(self, frame, p=250):
        p = int(p)
        expe = self.expe

        # Force ONE epoch # bernoulli variance...
        expe.epoch = 1
        self._generate()

        Y = self._Y
        Theta = self._Theta
        Phi = self._Phi

        theta = Theta[0]
        phi = Phi[0]

        N = theta.shape[0]

        likelihood = self.model.likelihood(theta, phi)
        adj = sp.stats.bernoulli.rvs(likelihood)

        n_to_zeros = N-p
        _idx = np.random.choice(np.arange(N**2), n_to_zeros, replace=False)
        idx = np.unravel_index(_idx, (N,N))

        adj_n = adj.copy()
        adj_n[idx] = 0

        g_n = nx.from_numpy_array(adj_n)
        degree_n = dict(g_n.degree())
        d_n, dc_n = degree_hist(degree_n, filter_zeros=False)

        g = nx.from_numpy_array(adj)
        degree = dict(g.degree())
        d, dc = degree_hist(degree, filter_zeros=False)


        x = d_n
        y = []
        burstiness = defaultdict(lambda:0)
        normalize_deg = defaultdict(lambda:0)
        # Compute p(d(N)>n+1 | p(p)=n)
        for node, deg_n in degree_n.items():
            if deg_n == 0:
                continue

            deg_N = degree[node]
            if deg_N > deg_n:
                burstiness[deg_n] += 1

            normalize_deg[deg_n] +=1

        # Normalize
        for deg, total in normalize_deg.items():
            burstiness[deg] = burstiness[deg] / total

        for deg in x:
            y.append(burstiness[deg])

        ax = frame.ax()
        ax.plot(x, y, label=expe.model)

        ax.legend(loc=1,prop={'size':10})
        ax.set_xlabel('n')
        ax.set_ylabel('Cumulative Sum')
        frame.title = expe.corpus + ' p=%s' % p
Ejemplo n.º 6
0
    def prop2_process_local_me(self, frame, p=90):
        p = int(p)
        expe = self.expe

        # Force ONE epoch # bernoulli variance...
        expe.epoch = 1
        self._generate()

        Y = self._Y
        Theta = self._Theta
        Phi = self._Phi

        theta = Theta[0]
        phi = Phi[0]

        N = theta.shape[0]
        K = theta.shape[1]

        ticks = []
        ticks_label = []

        nb_class = 0
        for _k1 in range(K):
            if 'ilfm' in expe.model:
                K = 1
            for _k2 in range(K):
                if 'ilfm' in expe.model:
                    _k2 = _k1

                if nb_class >= 4:
                    break

                n_to_zeros = int(N**2 * (1 - p / 100))

                adj = self._local_likelihood(theta, phi, _k1, _k2)
                #if adj.dtype == np.dtype(float):
                #    adj = sp.stats.bernoulli.rvs(adj)

                #    _id1 = np.arange(N**2).reshape((N,N))[adj==1]
                #    _id0 = np.arange(N**2).reshape((N,N))[adj==0]
                #    nn1 = len(_id1) // 3
                #    nn0 = n_to_zeros - nn1
                #    if nn1 > 0:
                #        _idx1 = np.random.choice(_id1, nn1, replace=False)
                #        _idx0 = np.random.choice(_id0, nn0, replace=False)
                #        _idx = np.hstack((_idx0, _idx1))
                #    else:
                #        _idx = _id0

                #    idx = np.unravel_index(_idx, (N,N))
                _idx = np.random.choice(np.arange(N**2),
                                        n_to_zeros,
                                        replace=False)

                idx = np.unravel_index(_idx, (N, N))

                adj_n = adj.copy()
                adj_n[idx] = 0

                if 'ilfm' in expe.model:
                    g_n = nx.from_numpy_array(adj_n)
                    degree_n = dict(g_n.degree())
                else:
                    degree_n = dict(
                        (i, int(round(d))) for i, d in enumerate(adj_n.sum(1)))
                d_n, dc_n = degree_hist(degree_n, filter_zeros=False)

                if 'ilfm' in expe.model:
                    g = nx.from_numpy_array(adj)
                    degree = dict(g.degree())
                else:
                    degree = dict(
                        (i, int(round(d))) for i, d in enumerate(adj.sum(1)))
                d, dc = degree_hist(degree, filter_zeros=False)

                x = d_n

                y = []
                burstiness = defaultdict(lambda: 0)
                normalize_deg = defaultdict(lambda: 0)
                # Compute p(d(N)>n+1 | p(p)=n)
                for node, deg_n in degree_n.items():
                    if deg_n == 0:
                        continue

                    deg_N = degree[node]
                    if deg_N > deg_n:
                        burstiness[deg_n] += 1

                    normalize_deg[deg_n] += 1

                # Normalize
                for deg, total in normalize_deg.items():
                    burstiness[deg] = burstiness[deg] / total

                for deg in x:
                    y.append(burstiness[deg])

                y = np.array(y)
                if len(y[y > 0]) <= 3:
                    continue

                ax = frame.ax()

                w = 0.4
                opacity = 0.8
                y = np.array(y)
                _index = 0
                index = _index + len(ticks) * 1.25
                c = self.colors.next()
                label = '%s K=%d' % (expe.model, _k1)
                one_count = 0
                first = True
                for n, v in enumerate(y):
                    if v == 0:
                        continue
                    if v == 1:
                        one_count += 1
                    else:
                        one_count = 0

                    if one_count >= 4:
                        break

                    if first:
                        label = 'classe %s' % nb_class
                    else:
                        label = None

                    rects = ax.bar(index + w / 2,
                                   v,
                                   w,
                                   alpha=opacity,
                                   label=label,
                                   color=c)
                    ticks.append(index)
                    ticks_label.append(x[n])
                    index += 1
                    first = False

                nb_class += 1

        ax.set_xticklabels(ticks_label)
        ticks = np.array(ticks) + w
        ax.set_xticks(ticks)
        ax.xaxis.set_tick_params(labelsize=8)

        ax.legend(loc=4, prop={'size': 10})
        ax.set_xlabel('n')
        ax.set_ylabel('Probability of new links')
        frame.title = '%s, %s,  p=%s' % (self.specname(
            expe.corpus), self.specname(expe.model), p)
Ejemplo n.º 7
0
    def prop2_process_me(self, frame, p=90):
        p = int(p)
        expe = self.expe

        # Force ONE epoch # bernoulli variance...
        expe.epoch = 1
        self._generate()

        Y = self._Y
        Theta = self._Theta
        Phi = self._Phi

        theta = Theta[0]
        phi = Phi[0]

        N = theta.shape[0]

        likelihood = self.model.likelihood(theta, phi)
        adj = sp.stats.bernoulli.rvs(likelihood)

        n_to_zeros = int(N**2 * (1 - p / 100))
        _idx = np.random.choice(np.arange(N**2), n_to_zeros, replace=False)
        idx = np.unravel_index(_idx, (N, N))

        adj_n = adj.copy()
        adj_n[idx] = 0

        g_n = nx.from_numpy_array(adj_n)
        degree_n = dict(g_n.degree())
        d_n, dc_n = degree_hist(degree_n, filter_zeros=False)

        g = nx.from_numpy_array(adj)
        degree = dict(g.degree())
        d, dc = degree_hist(degree, filter_zeros=False)

        x = d_n
        y = []
        burstiness = defaultdict(lambda: 0)
        normalize_deg = defaultdict(lambda: 0)
        # Compute p(d(N)>n+1 | p(p)=n)
        for node, deg_n in degree_n.items():
            if deg_n == 0:
                continue

            deg_N = degree[node]
            if deg_N > deg_n:
                burstiness[deg_n] += 1

            normalize_deg[deg_n] += 1

        # Normalize
        for deg, total in normalize_deg.items():
            burstiness[deg] = burstiness[deg] / total

        for deg in x:
            y.append(burstiness[deg])

        ax = frame.ax()

        w = 0.2
        opacity = 0.8
        y = np.array(y)
        index = 0
        ticks = []
        ticks_label = []
        c = self.colors.next()
        for n, v in enumerate(y):
            if v == 0:
                continue
            rects = ax.bar(index + w, v, w, alpha=opacity, label=None, color=c)
            ticks.append(index)
            ticks_label.append(x[n])
            index += 1

        ax.set_xticklabels(ticks_label)
        ticks = np.array(ticks) + w
        ax.set_xticks(ticks)
        ax.xaxis.set_tick_params(labelsize=8)

        ax.legend(loc=4, prop={'size': 10})
        ax.set_xlabel('n')
        ax.set_ylabel('Probability of new links')
        frame.title = '%s, %s,  p=%s' % (self.specname(
            expe.corpus), self.specname(expe.model), p)