Exemple #1
0
    def __init__(self, dmax=None, nmax=None, emax=None, cmax=None, vmax=None, comp_dmaxs=None,
                      filename=None, gen_efms=True, np_optimize='greedy', verbose=False):
        r"""Doing a fresh generation of connected multigraphs (`filename=None`)
        requires that `igraph` be installed.

        **Arguments**

        - **dmax** : _int_
            - The maximum number of edges of the generated connected graphs.
        - **nmax** : _int_
            - The maximum number of vertices of the generated connected graphs.
        - **emax** : _int_
            - The maximum number of edges of the generated connected simple
            graphs.
        - **cmax** : _int_
            - The maximum VE complexity $\chi$ of the generated connected
            graphs.
        - **vmax** : _int_
            - The maximum valency of the generated connected graphs.
        - **comp_dmaxs** : {_dict_, _int_}
            - If an integer, the maximum number of edges of the generated
            disconnected graphs. If a dictionary, the keys are numbers of
            vertices and the values are the maximum number of edges of the
            generated disconnected graphs with that number of vertices.
        - **filename** : _str_
            - If `None`, do a complete generation from scratch. If set to a
            string, read in connected graphs from the file given, restrict them
            according to the various 'max' parameters, and do a fresh
            disconnected generation. The special value `filename='default'`
            means to read in graphs from the default file. This is useful when
            various disconnected graph parameters are to be varied since the
            generation of large simple graphs is the most computationlly
            intensive part.
        - **gen_efms** : _bool_
            - Controls whether EFM information is generated.
        - **np_optimize** : {`True`, `False`, `'greedy'`, `'optimal'`}
            - The `optimize` keyword of `numpy.einsum_path`.
        - **verbose** : _bool_
            - A flag to control printing.
        """

        start = time.time()

        # check for new generation
        if dmax is not None and filename is None:

            # set maxs
            self._set_maxs(dmax, nmax, emax, cmax, vmax)

            # set options
            self.np_optimize = np_optimize
            self.gen_efms = gen_efms

            # get prime generator instance
            self.pr_gen = PrimeGenerator(self.dmax, self.nmax, self.emax, self.cmax, self.vmax, 
                                         self.gen_efms, self.np_optimize, verbose, start)
            self.cols = self.pr_gen.cols
            self._set_col_inds()

            if verbose:
                print('Finished generating prime graphs in {:.3f}.'.format(time.time() - start))

            # store lists of important quantities
            transfer(self, self.pr_gen, self._prime_attrs())

        # if filename is set, read in file
        else:
            file = load_efp_file(filename)

            # setup cols and col inds
            self.cols = file['cols']
            self._set_col_inds()

            # get maxs from file and passed in options
            c_specs = np.asarray(file['c_specs'])
            for m in ['dmax','nmax','emax','cmax','vmax']:
                setattr(self, m, min(file[m], none2inf(locals()[m])))

            # select connected specs based on maxs
            mask = ((c_specs[:,self.d_ind] <= self.dmax) & 
                    (c_specs[:,self.n_ind] <= self.nmax) & 
                    (c_specs[:,self.e_ind] <= self.emax) & 
                    (c_specs[:,self.c_ind] <= self.cmax) & 
                    (c_specs[:,self.v_ind] <= self.vmax))

            # set ve options
            self.np_optimize = file['np_optimize']

            # get lists of important quantities
            self.gen_efms = file['gen_efms'] and gen_efms
            for attr in (self._prime_attrs()):
                setattr(self, attr, [x for x,m in zip(file[attr],mask) if m])
            self.c_specs = c_specs[mask]


        # setup generator of disconnected graphs
        self._set_comp_dmaxs(comp_dmaxs)
        self.comp_gen = CompositeGenerator(self.c_specs, self.cols, self.comp_dmaxs)

        if verbose:
            print('Finished generating composite graphs in {:.3f}.'.format(time.time() - start))

        # get results and store
        transfer(self, self.comp_gen, self._comp_attrs())
Exemple #2
0
    def __init__(self, *args, **kwargs):
        r"""`EFPSet` can be initialized in one of three ways (in order of
        precedence):

        1. **Graphs** - Pass in graphs as lists of edges, just as for
        individual EFPs.
        2. **Generator** - Pass in a custom `Generator` object as the first
        positional argument.
        3. **Custom File** - Pass in the name of a `.npz` file saved with a
        custom `Generator`.
        4. **Default** - Use the $d\le10$ EFPs that come installed with the
        `EnergFlow` package.

        To control which EFPs are included, `EFPSet` accepts an arbitrary
        number of specifications (see [`sel`](#sel)) and only EFPs meeting each
        specification are included in the set. Note that no specifications
        should be passed in when initializing from explicit graphs.

        Since an EFP defines and holds a `Measure` instance, all `Measure`
        keywords are accepted.

        **Arguments**

        - ***args** : _arbitrary positional arguments_
            - Depending on the method of initialization, these can be either
            1) graphs to store, as lists of edges 2) a Generator instance
            followed by some number of valid arguments to `sel` or 3,4) valid
            arguments to `sel`. When passing in specific graphs, no arguments
            to `sel` should be given.
        - **filename** : _string_
            - Path to a `.npz` file which has been saved by a valid
            `energyflow.Generator`. A value of `None` will use the provided
            graphs, if a file is needed at all.
        - **measure** : {`'hadr'`, `'hadr-dot'`, `'ee'`}
            - See [Measures](../measures) for additional info.
        - **beta** : _float_
            - The parameter $\beta$ appearing in the measure. Must be greater
            than zero.
        - **kappa** : {_float_, `'pf'`}
            - If a number, the energy weighting parameter $\kappa$. If `'pf'`,
            use $\kappa=v-1$ where $v$ is the valency of the vertex.
        - **normed** : _bool_
            - Controls normalization of the energies in the measure.
        - **coords** : {`'ptyphim'`, `'epxpypz'`, `None`}
            - Controls which coordinates are assumed for the input. See 
            [Measures](../measures) for additional info.
        - **check_input** : _bool_
            - Whether to check the type of the input each time or assume the
            first input type.
        - **verbose** : _int_
            - Controls printed output when initializing `EFPSet` from a file or
            `Generator`.
        """

        # process arguments
        for k, v in {'filename': None, 'verbose': 0}.items():
            if k not in kwargs:
                kwargs[k] = v
            setattr(self, k, kwargs.pop(k))

        # initialize EFPBase
        super(EFPSet, self).__init__(kwargs)

        # handle different methods of initialization
        maxs = ['nmax', 'emax', 'dmax', 'cmax', 'vmax', 'comp_dmaxs']
        elemvs = ['edges', 'weights', 'einstrs', 'einpaths']
        efmvs = ['efm_einstrs', 'efm_einpaths', 'efm_specs']
        miscattrs = [
            'cols', 'gen_efms', 'c_specs', 'disc_specs', 'disc_formulae'
        ]
        if len(args) >= 1 and not sel_arg_check(args[0]) and not isinstance(
                args[0], Generator):
            gen = False
        elif len(args) >= 1 and isinstance(args[0], Generator):
            constructor_attrs = maxs + elemvs + efmvs + miscattrs
            gen = {attr: getattr(args[0], attr) for attr in constructor_attrs}
            args = args[1:]
        else:
            gen = load_efp_file(self.filename)

        # compiled regular expression for use in sel()
        self._sel_re = re.compile(r'(\w+)(<|>|==|!=|<=|>=)(\d+)$')
        self._cols = np.array(['n', 'e', 'd', 'v', 'k', 'c', 'p', 'h'])
        self.__dict__.update(
            {col + '_ind': i
             for i, col in enumerate(self._cols)})

        # initialize from given graphs
        if not gen:
            self._disc_col_inds = None
            self._efps = [EFP(graph, no_measure=True) for graph in args]
            self._cspecs = self._specs = np.asarray(
                [efp.spec for efp in self.efps])

        # initialize from a generator
        else:

            # handle not having efm generation
            if not gen['gen_efms'] and self.use_efms:
                raise ValueError(
                    'Cannot use efm measure without providing efm generation.')

            # verify columns with generator
            assert np.all(self._cols == gen['cols'])

            # get disc formulae and disc mask
            orig_disc_specs = np.asarray(gen['disc_specs'])
            disc_mask = self.sel(*args, specs=orig_disc_specs)
            disc_formulae = np.asarray(gen['disc_formulae'])[disc_mask]

            # get connected specs and full specs
            orig_c_specs = np.asarray(gen['c_specs'])
            c_mask = self.sel(*args, specs=orig_c_specs)
            self._cspecs = orig_c_specs[c_mask]
            self._specs = concat_specs(self._cspecs,
                                       orig_disc_specs[disc_mask])

            # make EFP list
            z = zip(*([gen[v] for v in elemvs] + [orig_c_specs] + [
                gen[v] if self.use_efms else itertools.repeat(None)
                for v in efmvs
            ]))
            self._efps = [
                EFP(args[0],
                    weights=args[1],
                    no_measure=True,
                    efpset_args=args[2:]) for m, args in enumerate(z)
                if c_mask[m]
            ]

            # get col indices for disconnected formulae
            connected_ndk = {efp.ndk: i for i, efp in enumerate(self.efps)}
            self._disc_col_inds = []
            for formula in disc_formulae:
                try:
                    self._disc_col_inds.append(
                        [connected_ndk[tuple(factor)] for factor in formula])
                except KeyError:
                    warnings.warn(
                        'connected efp needed for {} not found'.format(
                            formula))

            # handle printing
            if self.verbose > 0:
                print('Originally Available EFPs:')
                self.print_stats(specs=concat_specs(orig_c_specs,
                                                    orig_disc_specs),
                                 lws=2)
                if len(args) > 0:
                    print('Current Stored EFPs:')
                    self.print_stats(lws=2)

        # setup EFMs
        if self.use_efms:
            efm_specs = set(
                itertools.chain(*[efp.efm_spec for efp in self.efps]))
            self._efmset = EFMSet(efm_specs, subslicing=self.subslicing)

        # union over all weights needed
        self._weight_set = frozenset(w for efp in self.efps
                                     for w in efp.weight_set)