Esempio n. 1
0
    def _preprocess(self):
        expe = self.expe

        frontend = FrontendManager.load(expe)
        if frontend:
            self._N = frontend.getN()
            expe.symmetric = frontend.is_symmetric()
        else:
            self._N = expe.N
            expe.symmetric = True

        if expe._mode == 'predictive':
            ### Generate data from a fitted model
            model = ModelManager.from_expe(expe, load=True)

            try:
                # this try due to mthod modification entry in init not in picke object..
                expe.hyperparams = model.get_hyper()
            except Exception as e:
                self.log.warning('loading hyperparam error: %s' % e)
                if model is not None:
                    model._mean_w = 0
                    expe.hyperparams = 0

        elif expe._mode == 'generative':
            ### Generate data from a un-fitted model

            expe.alpha = 1
            expe.gmma = 1 / 2
            expe.delta = [0.5, 0.5]

            if 'ilfm' in expe.model:
                keys_hyper = ('alpha', 'delta')
                hyper = (expe.alpha, expe.delta)
            else:
                keys_hyper = ('alpha', 'gmma', 'delta')
                hyper = (expe.alpha, expe.gmma, expe.delta)
            expe.hyperparams = dict(zip(keys_hyper, hyper))
            expe.hyper = 'fix'  # dummy
            model = ModelManager.from_expe(expe, load=False)

        else:
            raise NotImplementedError(
                'What generation context ? predictive/generative..')

        self.log.info('=== GenNetworks === ')
        self.log.info('Mode: %s' % expe._mode)
        self.log.info('===')
        self.log.info('hyper: %s' % (str(expe.hyperparams)))

        self.frontend = frontend
        self.model = model

        if model is None:
            raise FileNotFoundError('No model for Expe at :  %s' %
                                    self.output_path)
Esempio n. 2
0
    def zipf(self, clusters_org='source'):
        ''' Zipf Analysis
            Local/Global Preferential attachment effect analysis

            Parameters
            ----------
            clusters_org: str
                cluster origin if from either ['source'|'model']
        '''
        expe = self.expe
        frontend = FrontendManager.load(expe)

        #
        # Get the Class/Cluster and local degree information
        # Reordering Adjacency Mmatrix based on Clusters/Class/Communities
        #
        clusters = None
        K = None
        if clusters_org == 'source':
            clusters = frontend.get_clusters()
        elif clusters_org == 'model':
            model = ModelManager.from_expe(expe, load=True)
            #clusters = model.get_clusters(K, skip=1)
            #clusters = model.get_communities(K)
            clusters = Louvain.get_clusters(frontend.to_directed(),
                                            resolution=10)
            if len(np.unique(clusters)) > 20 or False:
                self.log.info('Using Annealing clustering')
                clusters = Annealing(frontend.data,
                                     iterations=200,
                                     C_init=5,
                                     grow_rate=0).search()
            else:
                self.log.info('Using Louvain clustering')

        if clusters is None:
            lgg.error('No clusters here...passing')
            data_r = frontend.data
        else:
            block_hist = np.bincount(clusters)
            K = (block_hist != 0).sum()
            lgg.info('%d Clusters from `%s\':' % (K, clusters_org))
            data_r = reorder_mat(frontend.data, clusters)

        np.fill_diagonal(data_r, 0)

        from pymake.util.math import dilate
        dlt = lambda x: dilate(x) if x.sum() / x.shape[0]**2 < 0.1 else x
        ### Plot Adjacency matrix
        fig, (ax1, ax2) = plt.subplots(1, 2)
        fig.tight_layout(pad=1.6)
        adjshow(dlt(data_r), title=self.specname(expe.corpus), ax=ax1)
        #plt.figtext(.15, .1, homo_text, fontsize=12)
        #plt.suptitle(self.specname(expe.corpus))

        ### Plot Degree
        plot_degree_poly(data_r, ax=ax2)

        if expe._write:
            self.write_frames([fig], suffix='dd')
Esempio n. 3
0
File: plot.py Progetto: dtrckd/ml
    def _extract_data(self, z, data, *args):

        # Hook
        if self.expe.get('_refdir') == 'ai19_1':
            if not ('mmsb' in self.s.model or 'wmmsb' in self.s.model
                    or 'epm' in self.s.model):
                if re.search(r'roc(?![0-9])', z):
                    z = z.replace('roc', 'roc2')
                if re.search(r'wsim(?![0-9])', z):
                    z = z.replace('wsim', 'wsim2')

        value = None

        if z in data:
            # Extract from saved measure (.inf file).
            if 'min' in args:
                value = self._to_masked(data[z]).min()
            elif 'max' in args:
                value = self._to_masked(data[z]).max()
            else:
                value = self._to_masked(data[z][-1])

        elif '@' in z:
            # Extract a value from max/min fo the second (@)
            ag, vl = z.split('@')

            if 'min' in args:
                value = self._to_masked(data[vl]).argmin()
            else:
                value = self._to_masked(data[vl]).argmax()

            value = data[ag][value]

        else:

            if hasattr(self, 'get_' + z):
                _val = getattr(self, 'get_' + z)()
                if isinstance(_val, (list, np.ndarray)):
                    value = _val[-1]
                else:
                    value = _val

                return value

            # Compute it directly from the model.
            self.model = ModelManager.from_expe(self.expe, load=True)
            if not self.model:
                return
            else:
                model = self.model

            if hasattr(model, 'compute_' + z):
                value = getattr(model, 'compute_' + z)(**self.expe)
            else:
                self.log.error('attribute unknown: %s' % z)
                return

        return value
Esempio n. 4
0
    def _set_measures(self):
        measures = self.expe.get('_measures')
        if measures is None:
            if 'model' in self.expe:
                #from pymake.frontend.manager import ModelManager
                model = ModelManager.from_expe(self.expe)
                measures = getattr(model, '_measures', None)

        self._measures = measures
Esempio n. 5
0
    def load_model(self, frontend=None, load=False):
        ''' :load: boolean. Load from **preprocess** file is true else
                            it is a raw loading.
        '''
        from pymake.frontend.manager import ModelManager

        self.model = ModelManager.from_expe(self.expe,
                                            frontend=frontend,
                                            load=load)
        if load is False:
            self.configure_model(self.model)

        return self.model
Esempio n. 6
0
    def burstiness(self, clusters_org='source', _type='local'):
        '''Zipf Analisis
           (global burstiness) + local burstiness + feature burstiness
        '''
        expe = self.expe
        frontend = FrontendManager.load(expe)
        data = frontend.data
        figs = []

        # Global burstiness
        d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
        fig = plt.figure()
        plot_degree(data, spec=True, title=self.specname(expe.corpus))
        #plot_degree_poly(data, spec=True, title=expe.corpus)

        gof = gofit(d, dc)
        if not gof:
            return

        alpha = gof['alpha']
        x_min = gof['x_min']
        y_max = gof['y_max']
        # plot linear law from power law estimation
        #plt.figure()
        idx = d.searchsorted(x_min)
        i = int(idx  - 0.1 * len(d))
        idx = i if i  >= 0 else idx
        x = d[idx:]
        ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log(y_max))
        #ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log((alpha-1)/x_min))

        # Hack xticks
        fig.canvas.draw() # !
        lim = plt.gca().get_xlim() # !
        locs, labels = plt.xticks()

        idx_xmin = locs.searchsorted(x_min)
        locs = np.insert(locs, idx_xmin, x_min)
        labels.insert(idx_xmin, plt.Text(text='x_min'))
        plt.xticks(locs, labels)
        plt.gca().set_xlim(lim)

        fit = np.polyfit(np.log(d), np.log(dc), deg=1)
        poly_fit = fit[0] *np.log(d) + fit[1]
        diff = np.abs(poly_fit[-1] - np.log(ylin[-1]))
        ylin = np.exp( np.log(ylin) + diff*0.75)
        #\#

        plt.plot(x, ylin , 'g--', label='power %.2f' % alpha)
        figs.append(plt.gcf())

        # Local burstiness

        #
        # Get the Class/Cluster and local degree information
        # Reordering Adjacency Mmatrix based on Clusters/Class/Communities
        #
        clusters = None
        K = None
        if clusters_org == 'source':
            clusters = frontend.get_clusters()
        elif clusters_org == 'model':
            model = ModelManager.from_expe(expe, load=True)
            #clusters = model.get_clusters(K, skip=1)
            #clusters = model.get_communities(K)
            clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10)
            if len(np.unique(clusters)) > 20 or True:
                clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search()

        if clusters is None:
            lgg.error('No clusters here...passing')
            return
        else:
            block_hist = np.bincount(clusters)
            K = (block_hist != 0).sum()
            lgg.info('%d Clusters from `%s\':' % (K, clusters_org))

        expe.K = K
        assert(not 'model' in expe)
        expe.model = 'no_model'
        #data_r, labels= reorder_mat(data, clusters, labels=True)
        Table,Meas = self.init_fit_tables(_type=_type)

        # Just inner degree
        f = plt.figure()
        ax = f.gca()
        #f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True)

        # assume symmetric
        it_k = 0
        np.fill_diagonal(data, 0)
        for l in np.arange(K):
            for k in np.arange(K):
                if k != l:
                    continue

                ixgrid = np.ix_(clusters == k, clusters == l)

                if k == l:
                    title = 'Inner degree'
                    y = np.zeros(data.shape) # some zeros...
                    y[ixgrid] = data[ixgrid]
                    #ax = ax1
                else:
                    title = 'Outer degree'
                    y = np.zeros(data.shape) # some zeros...
                    y[ixgrid] = data[ixgrid]
                    #ax = ax2

                #
                title = ''
                #/#

                d, dc = degree_hist(adj_to_degree(y))
                if len(d) == 0: continue
                plot_degree_2((d,dc,None), logscale=True, colors=True, line=True, ax=ax, title=title)

                gof =  gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_k] = gof[v] #* y.sum() / TOT
                it_k += 1

        plt.suptitle(self.specname(expe.corpus))
        figs.append(plt.gcf())

        # Features burstiness
        plt.figure()
        hist, label = sorted_perm(block_hist, reverse=True)
        bins = len(hist)
        plt.bar(range(bins), hist)
        plt.xticks(np.arange(bins)+0.5, label)
        plt.xlabel('Class labels')
        plt.title('Blocks Size (max assignement)')
        figs.append(plt.gcf())

        if expe._write:
            self.write_frames(figs)

        if self._it == self.expe_size -1:
            for _model, table in self.gramexp.tables.items():

                # Mean and standard deviation
                table_mean = np.char.array(np.around(table.mean(2), decimals=3)).astype("|S20")
                table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20")
                table = table_mean + b' $\pm$ ' + table_std

                # Table formatting
                corpuses = self.specname(self.gramexp.get_set('corpus'))
                table = np.column_stack((self.specname(corpuses), table))
                tablefmt = 'simple'
                table = self.tabulate(table, headers=['__'+_model.upper()+'__']+Meas, tablefmt=tablefmt, floatfmt='.3f')
                print()
                print(table)
Esempio n. 7
0
 def _preprocess(self):
     self.model = ModelManager.from_expe(self.expe, load=True)