Exemple #1
0
 def specs(self):
     """An array of EFP specifications. Each row represents an EFP 
     and the columns represent the quantities indicated by `cols`."""
     
     if not hasattr(self, '_specs'):
         self._specs = concat_specs(self.c_specs, self.disc_specs)
     return self._specs
Exemple #2
0
    def __init__(self, *args, **kwargs):
        r"""EFPSet can be initialized in one of three ways (in order of precedence):

        1. **Default** - Use the ($d\le10$) EFPs that come installed with the
        `EnergFlow` package.
        2. **Generator** - Pass in a custom `Generator` object as the
        first positional argument.
        3. **Custom File** - Pass in the name of a `.npz` file saved
        with a custom `Generator`.

        To control which EFPs are included, `EFPSet` accepts an arbitrary
        number of specifications (see [`sel`](#sel)) and only EFPs meeting each
        specification are included in the set.

        **Arguments**

        - ***args** : _arbitrary positional arguments_
            - If the first positional argument is a `Generator` instance,
            it is used for initialization. The remaining positional
            arguments must be valid arguments to `sel`.
        - **filename** : _string_
            - Path to a `.npz` file which has been saved by a valid
            `energyflow.Generator`.
        - **measure** : {`'hadr'`, `'hadr-dot'`, `'ee'`}
            - See [Measures](../measures) for additional info.
        - **beta** : _float_
            - The parameter $\\beta$ appearing in the measure.
            Must be greater than zero.
        - **kappa** : {_float_, `'pf'`}
            - If a number, the energy weighting parameter $\\kappa$.
            If `'pf'`, use $\\kappa=v-1$ where $v$ is the valency of the vertex.
        - **normed** : _bool_
            - Controls normalization of the energies in the measure.
        - **coords** : {`'ptyphim'`, `'epxpypz'`, `None`}
            - Controls which coordinates are assumed for the input. See 
            [Measures](../measures) for additional info.
        - **check_input** : _bool_
            - Whether to check the type of the input each time or assume
            the first input type.
        - **verbose** : _bool_
            - Controls printed output when initializing EFPSet.
        """

        default_kwargs = {
            'filename': None,
            'measure': 'hadr',
            'beta': 1,
            'kappa': 1,
            'normed': True,
            'coords': None,
            'check_input': True,
            'verbose': False
        }
        measure_kwargs = [
            'measure', 'beta', 'kappa', 'normed', 'coords', 'check_input'
        ]

        # process arguments
        for k, v in default_kwargs.items():
            if k not in kwargs:
                kwargs[k] = v
            if k not in measure_kwargs:
                setattr(self, k, kwargs.pop(k))

        kwargs_check('__init__', kwargs, allowed=measure_kwargs)

        # initialize EFPBase
        super(EFPSet, self).__init__(*[kwargs[k] for k in measure_kwargs])

        # handle different methods of initialization
        maxs = ['nmax', 'emax', 'dmax', 'cmax', 'vmax', 'comp_dmaxs']
        elemvs = ['edges', 'weights', 'einstrs', 'einpaths']
        if len(args) >= 1 and isinstance(args[0], Generator):
            constructor_attrs = maxs + elemvs + [
                'cols', 'c_specs', 'disc_specs', 'disc_formulae'
            ]
            gen = {attr: getattr(args[0], attr) for attr in constructor_attrs}
            args = args[1:]
        elif self.filename is not None:
            self.filename += '.npz' if not self.filename.endswith(
                '.npz') else ''
            gen = np.load(self.filename, allow_pickle=True)
        else:
            gen = np.load(DEFAULT_EFP_FILE, allow_pickle=True)

        # compile regular expression for use in sel()
        self.SEL_RE = SEL_RE

        # put column headers and indices into namespace
        self._cols = gen['cols']
        self._set_col_inds()

        # put gen maxs into dict
        self.gen_maxs = {m: gen[m] for m in maxs}

        # get disc formulae and disc mask
        orig_disc_specs = gen['disc_specs']
        disc_mask = self.sel(*args, specs=orig_disc_specs)
        self.disc_formulae = gen['disc_formulae'][disc_mask]

        # get connected specs and full specs
        orig_c_specs = gen['c_specs']
        c_mask = self.sel(*args, specs=orig_c_specs)
        self._cspecs = orig_c_specs[c_mask]
        self._specs = concat_specs(self._cspecs, orig_disc_specs[disc_mask])

        # make EFPElem list
        z = zip(*([gen[v] for v in elemvs] + [orig_c_specs[:, self.k_ind]]))
        self.efpelems = [
            EFPElem(*args) for m, args in enumerate(z) if c_mask[m]
        ]

        # union over all weights needed
        self.__weight_set = frozenset(w for efpelem in self.efpelems
                                      for w in efpelem.weight_set)

        # get col indices for disconnected formulae
        connected_ndk = {
            efpelem.ndk: i
            for i, efpelem in enumerate(self.efpelems)
        }
        self.disc_col_inds = []
        for formula in self.disc_formulae:
            try:
                self.disc_col_inds.append(
                    [connected_ndk[factor] for factor in formula])
            except KeyError:
                warnings.warn(
                    'connected efp needed for {} not found'.format(formula))

        # handle printing
        if self.verbose:
            print('Originally Available EFPs:')
            self.print_stats(specs=concat_specs(orig_c_specs, orig_disc_specs),
                             lws=2)
            if len(args) > 0:
                print('Currently Stored EFPs:')
                self.print_stats(lws=2)
Exemple #3
0
    def __init__(self, *args, **kwargs):
        r"""`EFPSet` can be initialized in one of three ways (in order of
        precedence):

        1. **Graphs** - Pass in graphs as lists of edges, just as for
        individual EFPs.
        2. **Generator** - Pass in a custom `Generator` object as the first
        positional argument.
        3. **Custom File** - Pass in the name of a `.npz` file saved with a
        custom `Generator`.
        4. **Default** - Use the $d\le10$ EFPs that come installed with the
        `EnergFlow` package.

        To control which EFPs are included, `EFPSet` accepts an arbitrary
        number of specifications (see [`sel`](#sel)) and only EFPs meeting each
        specification are included in the set. Note that no specifications
        should be passed in when initializing from explicit graphs.

        Since an EFP defines and holds a `Measure` instance, all `Measure`
        keywords are accepted.

        **Arguments**

        - ***args** : _arbitrary positional arguments_
            - Depending on the method of initialization, these can be either
            1) graphs to store, as lists of edges 2) a Generator instance
            followed by some number of valid arguments to `sel` or 3,4) valid
            arguments to `sel`. When passing in specific graphs, no arguments
            to `sel` should be given.
        - **filename** : _string_
            - Path to a `.npz` file which has been saved by a valid
            `energyflow.Generator`. A value of `None` will use the provided
            graphs, if a file is needed at all.
        - **measure** : {`'hadr'`, `'hadr-dot'`, `'ee'`}
            - See [Measures](../measures) for additional info.
        - **beta** : _float_
            - The parameter $\beta$ appearing in the measure. Must be greater
            than zero.
        - **kappa** : {_float_, `'pf'`}
            - If a number, the energy weighting parameter $\kappa$. If `'pf'`,
            use $\kappa=v-1$ where $v$ is the valency of the vertex.
        - **normed** : _bool_
            - Controls normalization of the energies in the measure.
        - **coords** : {`'ptyphim'`, `'epxpypz'`, `None`}
            - Controls which coordinates are assumed for the input. See 
            [Measures](../measures) for additional info.
        - **check_input** : _bool_
            - Whether to check the type of the input each time or assume the
            first input type.
        - **verbose** : _int_
            - Controls printed output when initializing `EFPSet` from a file or
            `Generator`.
        """

        # process arguments
        for k, v in {'filename': None, 'verbose': 0}.items():
            if k not in kwargs:
                kwargs[k] = v
            setattr(self, k, kwargs.pop(k))

        # initialize EFPBase
        super(EFPSet, self).__init__(kwargs)

        # handle different methods of initialization
        maxs = ['nmax', 'emax', 'dmax', 'cmax', 'vmax', 'comp_dmaxs']
        elemvs = ['edges', 'weights', 'einstrs', 'einpaths']
        efmvs = ['efm_einstrs', 'efm_einpaths', 'efm_specs']
        miscattrs = [
            'cols', 'gen_efms', 'c_specs', 'disc_specs', 'disc_formulae'
        ]
        if len(args) >= 1 and not sel_arg_check(args[0]) and not isinstance(
                args[0], Generator):
            gen = False
        elif len(args) >= 1 and isinstance(args[0], Generator):
            constructor_attrs = maxs + elemvs + efmvs + miscattrs
            gen = {attr: getattr(args[0], attr) for attr in constructor_attrs}
            args = args[1:]
        else:
            gen = load_efp_file(self.filename)

        # compiled regular expression for use in sel()
        self._sel_re = re.compile(r'(\w+)(<|>|==|!=|<=|>=)(\d+)$')
        self._cols = np.array(['n', 'e', 'd', 'v', 'k', 'c', 'p', 'h'])
        self.__dict__.update(
            {col + '_ind': i
             for i, col in enumerate(self._cols)})

        # initialize from given graphs
        if not gen:
            self._disc_col_inds = None
            self._efps = [EFP(graph, no_measure=True) for graph in args]
            self._cspecs = self._specs = np.asarray(
                [efp.spec for efp in self.efps])

        # initialize from a generator
        else:

            # handle not having efm generation
            if not gen['gen_efms'] and self.use_efms:
                raise ValueError(
                    'Cannot use efm measure without providing efm generation.')

            # verify columns with generator
            assert np.all(self._cols == gen['cols'])

            # get disc formulae and disc mask
            orig_disc_specs = np.asarray(gen['disc_specs'])
            disc_mask = self.sel(*args, specs=orig_disc_specs)
            disc_formulae = np.asarray(gen['disc_formulae'])[disc_mask]

            # get connected specs and full specs
            orig_c_specs = np.asarray(gen['c_specs'])
            c_mask = self.sel(*args, specs=orig_c_specs)
            self._cspecs = orig_c_specs[c_mask]
            self._specs = concat_specs(self._cspecs,
                                       orig_disc_specs[disc_mask])

            # make EFP list
            z = zip(*([gen[v] for v in elemvs] + [orig_c_specs] + [
                gen[v] if self.use_efms else itertools.repeat(None)
                for v in efmvs
            ]))
            self._efps = [
                EFP(args[0],
                    weights=args[1],
                    no_measure=True,
                    efpset_args=args[2:]) for m, args in enumerate(z)
                if c_mask[m]
            ]

            # get col indices for disconnected formulae
            connected_ndk = {efp.ndk: i for i, efp in enumerate(self.efps)}
            self._disc_col_inds = []
            for formula in disc_formulae:
                try:
                    self._disc_col_inds.append(
                        [connected_ndk[tuple(factor)] for factor in formula])
                except KeyError:
                    warnings.warn(
                        'connected efp needed for {} not found'.format(
                            formula))

            # handle printing
            if self.verbose > 0:
                print('Originally Available EFPs:')
                self.print_stats(specs=concat_specs(orig_c_specs,
                                                    orig_disc_specs),
                                 lws=2)
                if len(args) > 0:
                    print('Current Stored EFPs:')
                    self.print_stats(lws=2)

        # setup EFMs
        if self.use_efms:
            efm_specs = set(
                itertools.chain(*[efp.efm_spec for efp in self.efps]))
            self._efmset = EFMSet(efm_specs, subslicing=self.subslicing)

        # union over all weights needed
        self._weight_set = frozenset(w for efp in self.efps
                                     for w in efp.weight_set)