def __init__(self, dmax=None, nmax=None, emax=None, cmax=None, vmax=None, comp_dmaxs=None, filename=None, gen_efms=True, np_optimize='greedy', verbose=False): r"""Doing a fresh generation of connected multigraphs (`filename=None`) requires that `igraph` be installed. **Arguments** - **dmax** : _int_ - The maximum number of edges of the generated connected graphs. - **nmax** : _int_ - The maximum number of vertices of the generated connected graphs. - **emax** : _int_ - The maximum number of edges of the generated connected simple graphs. - **cmax** : _int_ - The maximum VE complexity $\chi$ of the generated connected graphs. - **vmax** : _int_ - The maximum valency of the generated connected graphs. - **comp_dmaxs** : {_dict_, _int_} - If an integer, the maximum number of edges of the generated disconnected graphs. If a dictionary, the keys are numbers of vertices and the values are the maximum number of edges of the generated disconnected graphs with that number of vertices. - **filename** : _str_ - If `None`, do a complete generation from scratch. If set to a string, read in connected graphs from the file given, restrict them according to the various 'max' parameters, and do a fresh disconnected generation. The special value `filename='default'` means to read in graphs from the default file. This is useful when various disconnected graph parameters are to be varied since the generation of large simple graphs is the most computationlly intensive part. - **gen_efms** : _bool_ - Controls whether EFM information is generated. - **np_optimize** : {`True`, `False`, `'greedy'`, `'optimal'`} - The `optimize` keyword of `numpy.einsum_path`. - **verbose** : _bool_ - A flag to control printing. """ start = time.time() # check for new generation if dmax is not None and filename is None: # set maxs self._set_maxs(dmax, nmax, emax, cmax, vmax) # set options self.np_optimize = np_optimize self.gen_efms = gen_efms # get prime generator instance self.pr_gen = PrimeGenerator(self.dmax, self.nmax, self.emax, self.cmax, self.vmax, self.gen_efms, self.np_optimize, verbose, start) self.cols = self.pr_gen.cols self._set_col_inds() if verbose: print('Finished generating prime graphs in {:.3f}.'.format(time.time() - start)) # store lists of important quantities transfer(self, self.pr_gen, self._prime_attrs()) # if filename is set, read in file else: file = load_efp_file(filename) # setup cols and col inds self.cols = file['cols'] self._set_col_inds() # get maxs from file and passed in options c_specs = np.asarray(file['c_specs']) for m in ['dmax','nmax','emax','cmax','vmax']: setattr(self, m, min(file[m], none2inf(locals()[m]))) # select connected specs based on maxs mask = ((c_specs[:,self.d_ind] <= self.dmax) & (c_specs[:,self.n_ind] <= self.nmax) & (c_specs[:,self.e_ind] <= self.emax) & (c_specs[:,self.c_ind] <= self.cmax) & (c_specs[:,self.v_ind] <= self.vmax)) # set ve options self.np_optimize = file['np_optimize'] # get lists of important quantities self.gen_efms = file['gen_efms'] and gen_efms for attr in (self._prime_attrs()): setattr(self, attr, [x for x,m in zip(file[attr],mask) if m]) self.c_specs = c_specs[mask] # setup generator of disconnected graphs self._set_comp_dmaxs(comp_dmaxs) self.comp_gen = CompositeGenerator(self.c_specs, self.cols, self.comp_dmaxs) if verbose: print('Finished generating composite graphs in {:.3f}.'.format(time.time() - start)) # get results and store transfer(self, self.comp_gen, self._comp_attrs())
def __init__(self, *args, **kwargs): r"""`EFPSet` can be initialized in one of three ways (in order of precedence): 1. **Graphs** - Pass in graphs as lists of edges, just as for individual EFPs. 2. **Generator** - Pass in a custom `Generator` object as the first positional argument. 3. **Custom File** - Pass in the name of a `.npz` file saved with a custom `Generator`. 4. **Default** - Use the $d\le10$ EFPs that come installed with the `EnergFlow` package. To control which EFPs are included, `EFPSet` accepts an arbitrary number of specifications (see [`sel`](#sel)) and only EFPs meeting each specification are included in the set. Note that no specifications should be passed in when initializing from explicit graphs. Since an EFP defines and holds a `Measure` instance, all `Measure` keywords are accepted. **Arguments** - ***args** : _arbitrary positional arguments_ - Depending on the method of initialization, these can be either 1) graphs to store, as lists of edges 2) a Generator instance followed by some number of valid arguments to `sel` or 3,4) valid arguments to `sel`. When passing in specific graphs, no arguments to `sel` should be given. - **filename** : _string_ - Path to a `.npz` file which has been saved by a valid `energyflow.Generator`. A value of `None` will use the provided graphs, if a file is needed at all. - **measure** : {`'hadr'`, `'hadr-dot'`, `'ee'`} - See [Measures](../measures) for additional info. - **beta** : _float_ - The parameter $\beta$ appearing in the measure. Must be greater than zero. - **kappa** : {_float_, `'pf'`} - If a number, the energy weighting parameter $\kappa$. If `'pf'`, use $\kappa=v-1$ where $v$ is the valency of the vertex. - **normed** : _bool_ - Controls normalization of the energies in the measure. - **coords** : {`'ptyphim'`, `'epxpypz'`, `None`} - Controls which coordinates are assumed for the input. See [Measures](../measures) for additional info. - **check_input** : _bool_ - Whether to check the type of the input each time or assume the first input type. - **verbose** : _int_ - Controls printed output when initializing `EFPSet` from a file or `Generator`. """ # process arguments for k, v in {'filename': None, 'verbose': 0}.items(): if k not in kwargs: kwargs[k] = v setattr(self, k, kwargs.pop(k)) # initialize EFPBase super(EFPSet, self).__init__(kwargs) # handle different methods of initialization maxs = ['nmax', 'emax', 'dmax', 'cmax', 'vmax', 'comp_dmaxs'] elemvs = ['edges', 'weights', 'einstrs', 'einpaths'] efmvs = ['efm_einstrs', 'efm_einpaths', 'efm_specs'] miscattrs = [ 'cols', 'gen_efms', 'c_specs', 'disc_specs', 'disc_formulae' ] if len(args) >= 1 and not sel_arg_check(args[0]) and not isinstance( args[0], Generator): gen = False elif len(args) >= 1 and isinstance(args[0], Generator): constructor_attrs = maxs + elemvs + efmvs + miscattrs gen = {attr: getattr(args[0], attr) for attr in constructor_attrs} args = args[1:] else: gen = load_efp_file(self.filename) # compiled regular expression for use in sel() self._sel_re = re.compile(r'(\w+)(<|>|==|!=|<=|>=)(\d+)$') self._cols = np.array(['n', 'e', 'd', 'v', 'k', 'c', 'p', 'h']) self.__dict__.update( {col + '_ind': i for i, col in enumerate(self._cols)}) # initialize from given graphs if not gen: self._disc_col_inds = None self._efps = [EFP(graph, no_measure=True) for graph in args] self._cspecs = self._specs = np.asarray( [efp.spec for efp in self.efps]) # initialize from a generator else: # handle not having efm generation if not gen['gen_efms'] and self.use_efms: raise ValueError( 'Cannot use efm measure without providing efm generation.') # verify columns with generator assert np.all(self._cols == gen['cols']) # get disc formulae and disc mask orig_disc_specs = np.asarray(gen['disc_specs']) disc_mask = self.sel(*args, specs=orig_disc_specs) disc_formulae = np.asarray(gen['disc_formulae'])[disc_mask] # get connected specs and full specs orig_c_specs = np.asarray(gen['c_specs']) c_mask = self.sel(*args, specs=orig_c_specs) self._cspecs = orig_c_specs[c_mask] self._specs = concat_specs(self._cspecs, orig_disc_specs[disc_mask]) # make EFP list z = zip(*([gen[v] for v in elemvs] + [orig_c_specs] + [ gen[v] if self.use_efms else itertools.repeat(None) for v in efmvs ])) self._efps = [ EFP(args[0], weights=args[1], no_measure=True, efpset_args=args[2:]) for m, args in enumerate(z) if c_mask[m] ] # get col indices for disconnected formulae connected_ndk = {efp.ndk: i for i, efp in enumerate(self.efps)} self._disc_col_inds = [] for formula in disc_formulae: try: self._disc_col_inds.append( [connected_ndk[tuple(factor)] for factor in formula]) except KeyError: warnings.warn( 'connected efp needed for {} not found'.format( formula)) # handle printing if self.verbose > 0: print('Originally Available EFPs:') self.print_stats(specs=concat_specs(orig_c_specs, orig_disc_specs), lws=2) if len(args) > 0: print('Current Stored EFPs:') self.print_stats(lws=2) # setup EFMs if self.use_efms: efm_specs = set( itertools.chain(*[efp.efm_spec for efp in self.efps])) self._efmset = EFMSet(efm_specs, subslicing=self.subslicing) # union over all weights needed self._weight_set = frozenset(w for efp in self.efps for w in efp.weight_set)