def specs(self): """An array of EFP specifications. Each row represents an EFP and the columns represent the quantities indicated by `cols`.""" if not hasattr(self, '_specs'): self._specs = concat_specs(self.c_specs, self.disc_specs) return self._specs
def __init__(self, *args, **kwargs): r"""EFPSet can be initialized in one of three ways (in order of precedence): 1. **Default** - Use the ($d\le10$) EFPs that come installed with the `EnergFlow` package. 2. **Generator** - Pass in a custom `Generator` object as the first positional argument. 3. **Custom File** - Pass in the name of a `.npz` file saved with a custom `Generator`. To control which EFPs are included, `EFPSet` accepts an arbitrary number of specifications (see [`sel`](#sel)) and only EFPs meeting each specification are included in the set. **Arguments** - ***args** : _arbitrary positional arguments_ - If the first positional argument is a `Generator` instance, it is used for initialization. The remaining positional arguments must be valid arguments to `sel`. - **filename** : _string_ - Path to a `.npz` file which has been saved by a valid `energyflow.Generator`. - **measure** : {`'hadr'`, `'hadr-dot'`, `'ee'`} - See [Measures](../measures) for additional info. - **beta** : _float_ - The parameter $\\beta$ appearing in the measure. Must be greater than zero. - **kappa** : {_float_, `'pf'`} - If a number, the energy weighting parameter $\\kappa$. If `'pf'`, use $\\kappa=v-1$ where $v$ is the valency of the vertex. - **normed** : _bool_ - Controls normalization of the energies in the measure. - **coords** : {`'ptyphim'`, `'epxpypz'`, `None`} - Controls which coordinates are assumed for the input. See [Measures](../measures) for additional info. - **check_input** : _bool_ - Whether to check the type of the input each time or assume the first input type. - **verbose** : _bool_ - Controls printed output when initializing EFPSet. """ default_kwargs = { 'filename': None, 'measure': 'hadr', 'beta': 1, 'kappa': 1, 'normed': True, 'coords': None, 'check_input': True, 'verbose': False } measure_kwargs = [ 'measure', 'beta', 'kappa', 'normed', 'coords', 'check_input' ] # process arguments for k, v in default_kwargs.items(): if k not in kwargs: kwargs[k] = v if k not in measure_kwargs: setattr(self, k, kwargs.pop(k)) kwargs_check('__init__', kwargs, allowed=measure_kwargs) # initialize EFPBase super(EFPSet, self).__init__(*[kwargs[k] for k in measure_kwargs]) # handle different methods of initialization maxs = ['nmax', 'emax', 'dmax', 'cmax', 'vmax', 'comp_dmaxs'] elemvs = ['edges', 'weights', 'einstrs', 'einpaths'] if len(args) >= 1 and isinstance(args[0], Generator): constructor_attrs = maxs + elemvs + [ 'cols', 'c_specs', 'disc_specs', 'disc_formulae' ] gen = {attr: getattr(args[0], attr) for attr in constructor_attrs} args = args[1:] elif self.filename is not None: self.filename += '.npz' if not self.filename.endswith( '.npz') else '' gen = np.load(self.filename, allow_pickle=True) else: gen = np.load(DEFAULT_EFP_FILE, allow_pickle=True) # compile regular expression for use in sel() self.SEL_RE = SEL_RE # put column headers and indices into namespace self._cols = gen['cols'] self._set_col_inds() # put gen maxs into dict self.gen_maxs = {m: gen[m] for m in maxs} # get disc formulae and disc mask orig_disc_specs = gen['disc_specs'] disc_mask = self.sel(*args, specs=orig_disc_specs) self.disc_formulae = gen['disc_formulae'][disc_mask] # get connected specs and full specs orig_c_specs = gen['c_specs'] c_mask = self.sel(*args, specs=orig_c_specs) self._cspecs = orig_c_specs[c_mask] self._specs = concat_specs(self._cspecs, orig_disc_specs[disc_mask]) # make EFPElem list z = zip(*([gen[v] for v in elemvs] + [orig_c_specs[:, self.k_ind]])) self.efpelems = [ EFPElem(*args) for m, args in enumerate(z) if c_mask[m] ] # union over all weights needed self.__weight_set = frozenset(w for efpelem in self.efpelems for w in efpelem.weight_set) # get col indices for disconnected formulae connected_ndk = { efpelem.ndk: i for i, efpelem in enumerate(self.efpelems) } self.disc_col_inds = [] for formula in self.disc_formulae: try: self.disc_col_inds.append( [connected_ndk[factor] for factor in formula]) except KeyError: warnings.warn( 'connected efp needed for {} not found'.format(formula)) # handle printing if self.verbose: print('Originally Available EFPs:') self.print_stats(specs=concat_specs(orig_c_specs, orig_disc_specs), lws=2) if len(args) > 0: print('Currently Stored EFPs:') self.print_stats(lws=2)
def __init__(self, *args, **kwargs): r"""`EFPSet` can be initialized in one of three ways (in order of precedence): 1. **Graphs** - Pass in graphs as lists of edges, just as for individual EFPs. 2. **Generator** - Pass in a custom `Generator` object as the first positional argument. 3. **Custom File** - Pass in the name of a `.npz` file saved with a custom `Generator`. 4. **Default** - Use the $d\le10$ EFPs that come installed with the `EnergFlow` package. To control which EFPs are included, `EFPSet` accepts an arbitrary number of specifications (see [`sel`](#sel)) and only EFPs meeting each specification are included in the set. Note that no specifications should be passed in when initializing from explicit graphs. Since an EFP defines and holds a `Measure` instance, all `Measure` keywords are accepted. **Arguments** - ***args** : _arbitrary positional arguments_ - Depending on the method of initialization, these can be either 1) graphs to store, as lists of edges 2) a Generator instance followed by some number of valid arguments to `sel` or 3,4) valid arguments to `sel`. When passing in specific graphs, no arguments to `sel` should be given. - **filename** : _string_ - Path to a `.npz` file which has been saved by a valid `energyflow.Generator`. A value of `None` will use the provided graphs, if a file is needed at all. - **measure** : {`'hadr'`, `'hadr-dot'`, `'ee'`} - See [Measures](../measures) for additional info. - **beta** : _float_ - The parameter $\beta$ appearing in the measure. Must be greater than zero. - **kappa** : {_float_, `'pf'`} - If a number, the energy weighting parameter $\kappa$. If `'pf'`, use $\kappa=v-1$ where $v$ is the valency of the vertex. - **normed** : _bool_ - Controls normalization of the energies in the measure. - **coords** : {`'ptyphim'`, `'epxpypz'`, `None`} - Controls which coordinates are assumed for the input. See [Measures](../measures) for additional info. - **check_input** : _bool_ - Whether to check the type of the input each time or assume the first input type. - **verbose** : _int_ - Controls printed output when initializing `EFPSet` from a file or `Generator`. """ # process arguments for k, v in {'filename': None, 'verbose': 0}.items(): if k not in kwargs: kwargs[k] = v setattr(self, k, kwargs.pop(k)) # initialize EFPBase super(EFPSet, self).__init__(kwargs) # handle different methods of initialization maxs = ['nmax', 'emax', 'dmax', 'cmax', 'vmax', 'comp_dmaxs'] elemvs = ['edges', 'weights', 'einstrs', 'einpaths'] efmvs = ['efm_einstrs', 'efm_einpaths', 'efm_specs'] miscattrs = [ 'cols', 'gen_efms', 'c_specs', 'disc_specs', 'disc_formulae' ] if len(args) >= 1 and not sel_arg_check(args[0]) and not isinstance( args[0], Generator): gen = False elif len(args) >= 1 and isinstance(args[0], Generator): constructor_attrs = maxs + elemvs + efmvs + miscattrs gen = {attr: getattr(args[0], attr) for attr in constructor_attrs} args = args[1:] else: gen = load_efp_file(self.filename) # compiled regular expression for use in sel() self._sel_re = re.compile(r'(\w+)(<|>|==|!=|<=|>=)(\d+)$') self._cols = np.array(['n', 'e', 'd', 'v', 'k', 'c', 'p', 'h']) self.__dict__.update( {col + '_ind': i for i, col in enumerate(self._cols)}) # initialize from given graphs if not gen: self._disc_col_inds = None self._efps = [EFP(graph, no_measure=True) for graph in args] self._cspecs = self._specs = np.asarray( [efp.spec for efp in self.efps]) # initialize from a generator else: # handle not having efm generation if not gen['gen_efms'] and self.use_efms: raise ValueError( 'Cannot use efm measure without providing efm generation.') # verify columns with generator assert np.all(self._cols == gen['cols']) # get disc formulae and disc mask orig_disc_specs = np.asarray(gen['disc_specs']) disc_mask = self.sel(*args, specs=orig_disc_specs) disc_formulae = np.asarray(gen['disc_formulae'])[disc_mask] # get connected specs and full specs orig_c_specs = np.asarray(gen['c_specs']) c_mask = self.sel(*args, specs=orig_c_specs) self._cspecs = orig_c_specs[c_mask] self._specs = concat_specs(self._cspecs, orig_disc_specs[disc_mask]) # make EFP list z = zip(*([gen[v] for v in elemvs] + [orig_c_specs] + [ gen[v] if self.use_efms else itertools.repeat(None) for v in efmvs ])) self._efps = [ EFP(args[0], weights=args[1], no_measure=True, efpset_args=args[2:]) for m, args in enumerate(z) if c_mask[m] ] # get col indices for disconnected formulae connected_ndk = {efp.ndk: i for i, efp in enumerate(self.efps)} self._disc_col_inds = [] for formula in disc_formulae: try: self._disc_col_inds.append( [connected_ndk[tuple(factor)] for factor in formula]) except KeyError: warnings.warn( 'connected efp needed for {} not found'.format( formula)) # handle printing if self.verbose > 0: print('Originally Available EFPs:') self.print_stats(specs=concat_specs(orig_c_specs, orig_disc_specs), lws=2) if len(args) > 0: print('Current Stored EFPs:') self.print_stats(lws=2) # setup EFMs if self.use_efms: efm_specs = set( itertools.chain(*[efp.efm_spec for efp in self.efps])) self._efmset = EFMSet(efm_specs, subslicing=self.subslicing) # union over all weights needed self._weight_set = frozenset(w for efp in self.efps for w in efp.weight_set)