def _preprocess(self): expe = self.expe frontend = FrontendManager.load(expe) if frontend: self._N = frontend.getN() expe.symmetric = frontend.is_symmetric() else: self._N = expe.N expe.symmetric = True if expe._mode == 'predictive': ### Generate data from a fitted model model = ModelManager.from_expe(expe, load=True) try: # this try due to mthod modification entry in init not in picke object.. expe.hyperparams = model.get_hyper() except Exception as e: self.log.warning('loading hyperparam error: %s' % e) if model is not None: model._mean_w = 0 expe.hyperparams = 0 elif expe._mode == 'generative': ### Generate data from a un-fitted model expe.alpha = 1 expe.gmma = 1 / 2 expe.delta = [0.5, 0.5] if 'ilfm' in expe.model: keys_hyper = ('alpha', 'delta') hyper = (expe.alpha, expe.delta) else: keys_hyper = ('alpha', 'gmma', 'delta') hyper = (expe.alpha, expe.gmma, expe.delta) expe.hyperparams = dict(zip(keys_hyper, hyper)) expe.hyper = 'fix' # dummy model = ModelManager.from_expe(expe, load=False) else: raise NotImplementedError( 'What generation context ? predictive/generative..') self.log.info('=== GenNetworks === ') self.log.info('Mode: %s' % expe._mode) self.log.info('===') self.log.info('hyper: %s' % (str(expe.hyperparams))) self.frontend = frontend self.model = model if model is None: raise FileNotFoundError('No model for Expe at : %s' % self.output_path)
def zipf(self, clusters_org='source'): ''' Zipf Analysis Local/Global Preferential attachment effect analysis Parameters ---------- clusters_org: str cluster origin if from either ['source'|'model'] ''' expe = self.expe frontend = FrontendManager.load(expe) # # Get the Class/Cluster and local degree information # Reordering Adjacency Mmatrix based on Clusters/Class/Communities # clusters = None K = None if clusters_org == 'source': clusters = frontend.get_clusters() elif clusters_org == 'model': model = ModelManager.from_expe(expe, load=True) #clusters = model.get_clusters(K, skip=1) #clusters = model.get_communities(K) clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10) if len(np.unique(clusters)) > 20 or False: self.log.info('Using Annealing clustering') clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search() else: self.log.info('Using Louvain clustering') if clusters is None: lgg.error('No clusters here...passing') data_r = frontend.data else: block_hist = np.bincount(clusters) K = (block_hist != 0).sum() lgg.info('%d Clusters from `%s\':' % (K, clusters_org)) data_r = reorder_mat(frontend.data, clusters) np.fill_diagonal(data_r, 0) from pymake.util.math import dilate dlt = lambda x: dilate(x) if x.sum() / x.shape[0]**2 < 0.1 else x ### Plot Adjacency matrix fig, (ax1, ax2) = plt.subplots(1, 2) fig.tight_layout(pad=1.6) adjshow(dlt(data_r), title=self.specname(expe.corpus), ax=ax1) #plt.figtext(.15, .1, homo_text, fontsize=12) #plt.suptitle(self.specname(expe.corpus)) ### Plot Degree plot_degree_poly(data_r, ax=ax2) if expe._write: self.write_frames([fig], suffix='dd')
def _extract_data(self, z, data, *args): # Hook if self.expe.get('_refdir') == 'ai19_1': if not ('mmsb' in self.s.model or 'wmmsb' in self.s.model or 'epm' in self.s.model): if re.search(r'roc(?![0-9])', z): z = z.replace('roc', 'roc2') if re.search(r'wsim(?![0-9])', z): z = z.replace('wsim', 'wsim2') value = None if z in data: # Extract from saved measure (.inf file). if 'min' in args: value = self._to_masked(data[z]).min() elif 'max' in args: value = self._to_masked(data[z]).max() else: value = self._to_masked(data[z][-1]) elif '@' in z: # Extract a value from max/min fo the second (@) ag, vl = z.split('@') if 'min' in args: value = self._to_masked(data[vl]).argmin() else: value = self._to_masked(data[vl]).argmax() value = data[ag][value] else: if hasattr(self, 'get_' + z): _val = getattr(self, 'get_' + z)() if isinstance(_val, (list, np.ndarray)): value = _val[-1] else: value = _val return value # Compute it directly from the model. self.model = ModelManager.from_expe(self.expe, load=True) if not self.model: return else: model = self.model if hasattr(model, 'compute_' + z): value = getattr(model, 'compute_' + z)(**self.expe) else: self.log.error('attribute unknown: %s' % z) return return value
def _set_measures(self): measures = self.expe.get('_measures') if measures is None: if 'model' in self.expe: #from pymake.frontend.manager import ModelManager model = ModelManager.from_expe(self.expe) measures = getattr(model, '_measures', None) self._measures = measures
def load_model(self, frontend=None, load=False): ''' :load: boolean. Load from **preprocess** file is true else it is a raw loading. ''' from pymake.frontend.manager import ModelManager self.model = ModelManager.from_expe(self.expe, frontend=frontend, load=load) if load is False: self.configure_model(self.model) return self.model
def burstiness(self, clusters_org='source', _type='local'): '''Zipf Analisis (global burstiness) + local burstiness + feature burstiness ''' expe = self.expe frontend = FrontendManager.load(expe) data = frontend.data figs = [] # Global burstiness d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) fig = plt.figure() plot_degree(data, spec=True, title=self.specname(expe.corpus)) #plot_degree_poly(data, spec=True, title=expe.corpus) gof = gofit(d, dc) if not gof: return alpha = gof['alpha'] x_min = gof['x_min'] y_max = gof['y_max'] # plot linear law from power law estimation #plt.figure() idx = d.searchsorted(x_min) i = int(idx - 0.1 * len(d)) idx = i if i >= 0 else idx x = d[idx:] ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log(y_max)) #ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log((alpha-1)/x_min)) # Hack xticks fig.canvas.draw() # ! lim = plt.gca().get_xlim() # ! locs, labels = plt.xticks() idx_xmin = locs.searchsorted(x_min) locs = np.insert(locs, idx_xmin, x_min) labels.insert(idx_xmin, plt.Text(text='x_min')) plt.xticks(locs, labels) plt.gca().set_xlim(lim) fit = np.polyfit(np.log(d), np.log(dc), deg=1) poly_fit = fit[0] *np.log(d) + fit[1] diff = np.abs(poly_fit[-1] - np.log(ylin[-1])) ylin = np.exp( np.log(ylin) + diff*0.75) #\# plt.plot(x, ylin , 'g--', label='power %.2f' % alpha) figs.append(plt.gcf()) # Local burstiness # # Get the Class/Cluster and local degree information # Reordering Adjacency Mmatrix based on Clusters/Class/Communities # clusters = None K = None if clusters_org == 'source': clusters = frontend.get_clusters() elif clusters_org == 'model': model = ModelManager.from_expe(expe, load=True) #clusters = model.get_clusters(K, skip=1) #clusters = model.get_communities(K) clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10) if len(np.unique(clusters)) > 20 or True: clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search() if clusters is None: lgg.error('No clusters here...passing') return else: block_hist = np.bincount(clusters) K = (block_hist != 0).sum() lgg.info('%d Clusters from `%s\':' % (K, clusters_org)) expe.K = K assert(not 'model' in expe) expe.model = 'no_model' #data_r, labels= reorder_mat(data, clusters, labels=True) Table,Meas = self.init_fit_tables(_type=_type) # Just inner degree f = plt.figure() ax = f.gca() #f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) # assume symmetric it_k = 0 np.fill_diagonal(data, 0) for l in np.arange(K): for k in np.arange(K): if k != l: continue ixgrid = np.ix_(clusters == k, clusters == l) if k == l: title = 'Inner degree' y = np.zeros(data.shape) # some zeros... y[ixgrid] = data[ixgrid] #ax = ax1 else: title = 'Outer degree' y = np.zeros(data.shape) # some zeros... y[ixgrid] = data[ixgrid] #ax = ax2 # title = '' #/# d, dc = degree_hist(adj_to_degree(y)) if len(d) == 0: continue plot_degree_2((d,dc,None), logscale=True, colors=True, line=True, ax=ax, title=title) gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_k] = gof[v] #* y.sum() / TOT it_k += 1 plt.suptitle(self.specname(expe.corpus)) figs.append(plt.gcf()) # Features burstiness plt.figure() hist, label = sorted_perm(block_hist, reverse=True) bins = len(hist) plt.bar(range(bins), hist) plt.xticks(np.arange(bins)+0.5, label) plt.xlabel('Class labels') plt.title('Blocks Size (max assignement)') figs.append(plt.gcf()) if expe._write: self.write_frames(figs) if self._it == self.expe_size -1: for _model, table in self.gramexp.tables.items(): # Mean and standard deviation table_mean = np.char.array(np.around(table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' table = self.tabulate(table, headers=['__'+_model.upper()+'__']+Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table)
def _preprocess(self): self.model = ModelManager.from_expe(self.expe, load=True)