Exemple #1
0
def test_concatenate():
    xx = sparse.random((2, 3, 4), density=0.5, format="gcxs")
    x = xx.todense()
    yy = sparse.random((5, 3, 4), density=0.5, format="gcxs")
    y = yy.todense()
    zz = sparse.random((4, 3, 4), density=0.5, format="gcxs")
    z = zz.todense()

    assert_eq(
        np.concatenate([x, y, z], axis=0), sparse.concatenate([xx, yy, zz], axis=0)
    )

    xx = sparse.random((5, 3, 1), density=0.5, format="gcxs")
    x = xx.todense()
    yy = sparse.random((5, 3, 3), density=0.5, format="gcxs")
    y = yy.todense()
    zz = sparse.random((5, 3, 2), density=0.5, format="gcxs")
    z = zz.todense()

    assert_eq(
        np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2)
    )

    assert_eq(
        np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1)
    )
    def initialize(self, rate_ion, rate_cx, t_ion, t_edge, n_edge):
        for v in [rate_ion, rate_cx, t_ion]:
            if v.shape != self.r.shape:
                raise ValueError('Shape mismatch, {} and {}'.format(
                    v.shape, self.r.shape))
        self.rate.initialize(rate_ion + rate_cx, rate_cx, t_ion)
        self.t_ion = t_ion
        self.n_edge = n_edge
        self.nt_edge = n_edge * t_edge
        self.ntt_edge = n_edge * t_edge * t_edge

        n = self.size - 1
        # prepare matrices
        # matrix for the particle balance
        A_part = sparse.concatenate([
            -sparse.tensordot(
                self.rate.Rij + self.rate.Sij, self.rate.slice_l, axes=(0, 0)),
            sparse.tensordot(self.rate.Dij, self.rate.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n))
        ],
                                    axis=0).T
        # matrix for the energy balance
        A_engy = sparse.concatenate([
            -1.5 *
            sparse.tensordot(self.rate.Eij, self.rate.slice_l, axes=(0, 0)),
            -1.5 *
            sparse.tensordot(self.rate.Rij, self.rate.slice_l, axes=(0, 0)),
            2.5 *
            sparse.tensordot(self.rate.Dij, self.rate.slice_l, axes=(0, 0))
        ],
                                    axis=0).T
        # balance matrix.
        self.A = sparse.concatenate([A_part, A_engy], axis=0)

        # boundary conditions
        b_part = (-self.n_edge * sparse.tensordot(
            self.rate.Rij + self.rate.Sij, self.rate.slice_last, axes=(0, 0)) +
                  self.nt_edge * sparse.tensordot(
                      self.rate.Dij, self.rate.slice_last, axes=(0, 0))
                  ).todense()

        b_engy = (
            -1.5 * self.n_edge *
            sparse.tensordot(self.rate.Eij, self.rate.slice_last, axes=(0, 0))
            - 1.5 * self.nt_edge * sparse.tensordot(
                self.rate.Rij, self.rate.slice_last, axes=(0, 0)) +
            2.5 * self.ntt_edge * sparse.tensordot(
                self.rate.Dij, self.rate.slice_last, axes=(0, 0))).todense()
        self.b = -np.concatenate([b_part, b_engy])

        # matrix for the constraint
        self.L = scipy.sparse.hstack([
            scipy.sparse.identity(n),
            scipy.sparse.identity(n) * (-2.0),
            scipy.sparse.identity(n)
        ])
Exemple #3
0
def test_concatenate_2(xx, yy, zz):
    x = xx.todense()
    y = yy.todense()
    z = zz.todense()

    assert_eq(
        np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2)
    )

    assert_eq(
        np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1)
    )
Exemple #4
0
        def stack():
            rings = masks.radial_bins(centerX=cx,
                                      centerY=cy,
                                      imageSizeX=detector_x,
                                      imageSizeY=detector_y,
                                      radius=ro,
                                      radius_inner=ri,
                                      n_bins=n_bins,
                                      use_sparse=use_sparse,
                                      dtype=np.complex64)

            orders = np.arange(max_order + 1)

            r, phi = masks.polar_map(centerX=cx,
                                     centerY=cy,
                                     imageSizeX=detector_x,
                                     imageSizeY=detector_y)
            modulator = np.exp(phi * orders[:, np.newaxis, np.newaxis] * 1j)

            if use_sparse:
                rings = rings.reshape((rings.shape[0], 1, *rings.shape[1:]))
                ring_stack = [rings] * len(orders)
                ring_stack = sparse.concatenate(ring_stack, axis=1)
                ring_stack *= modulator
            else:
                ring_stack = rings[:, np.newaxis, ...] * modulator
            return ring_stack.reshape((-1, detector_y, detector_x))
Exemple #5
0
def concat(L):
    if isinstance(L[0], np.ndarray):
        return np.concatenate(L, axis=0)
    elif isinstance(L[0], (pd.DataFrame, pd.Series)):
        return pd.concat(L, axis=0)
    elif ss and isinstance(L[0], ss.spmatrix):
        return ss.vstack(L, format='csr')
    elif sparse and isinstance(L[0], sparse.SparseArray):
        return sparse.concatenate(L, axis=0)
    else:
        raise TypeError("Data must be either numpy arrays or pandas dataframes. Got %s" % type(L[0]))
Exemple #6
0
def concat(seq):
    if isinstance(seq[0], np.ndarray):
        return np.concatenate(seq, axis=0)
    elif isinstance(seq[0], (pd.DataFrame, pd.Series)):
        return pd.concat(seq, axis=0)
    elif ss and isinstance(seq[0], ss.spmatrix):
        return ss.vstack(seq, format='csr')
    elif sparse and isinstance(seq[0], sparse.SparseArray):
        return sparse.concatenate(seq, axis=0)
    else:
        raise TypeError(
            'Data must be one of: numpy arrays, pandas dataframes, sparse matrices '
            f'(from scipy or from sparse). Got {type(seq[0])}.')
Exemple #7
0
    def _compute_masks(self):
        """
        Call mask factories and combine to mask stack

        Returns
        -------
        a list of masks with contents as they were created by the factories
        and converted uniformly to dense or sparse matrices depending on
        ``self.use_sparse``.
        """
        # Make sure all the masks are either sparse or dense
        # If the use_sparse property is set to Ture or False,
        # it takes precedence.
        # If it is None, use sparse only if all masks are sparse
        # and set the use_sparse property accordingly

        default_sparse = 'scipy.sparse'

        if callable(self.mask_factories):
            raw_masks = self.mask_factories()
            if not is_sparse(raw_masks):
                default_sparse = False
            mask_slices = [raw_masks]
        else:
            mask_slices = []
            for f in self.mask_factories:
                m = f()
                # Scipy.sparse is always 2D, so we have to convert here
                # before reshaping
                if scipy.sparse.issparse(m):
                    m = sparse.COO.from_scipy_sparse(m)
                # We reshape to be a stack of 1 so that we can unify code below
                m = m.reshape((1, ) + m.shape)
                if not is_sparse(m):
                    default_sparse = False
                mask_slices.append(m)

        if self._use_sparse is None:
            self._use_sparse = default_sparse

        if self.use_sparse:
            # Conversion to correct back-end will happen later
            # Use sparse.pydata because it implements the array interface
            # which makes mask handling easier
            masks = sparse.concatenate([to_sparse(m) for m in mask_slices])
        else:
            masks = np.concatenate([to_dense(m) for m in mask_slices])
        return masks
Exemple #8
0
def test_upcast():
    a = sparse.random((50, 50, 50),
                      density=0.1,
                      format="coo",
                      idx_dtype=np.uint8)
    b = a.asformat("gcxs")
    assert b.indices.dtype == np.uint16

    a = sparse.random((8, 7, 6),
                      density=0.5,
                      format="gcxs",
                      idx_dtype=np.uint8)
    b = sparse.random((6, 6, 6),
                      density=0.8,
                      format="gcxs",
                      idx_dtype=np.uint8)
    assert sparse.concatenate((a, a)).indptr.dtype == np.uint16
    assert sparse.stack((b, b)).indptr.dtype == np.uint16
Exemple #9
0
    def _compute_masks(self):
        """
        Call mask factories and convert to the dataset dtype

        Returns
        -------
        a list of masks with contents as they were created by the factories
        and converted uniformly to dense or sparse matrices depending on
        ``self.use_sparse``.
        """
        # Make sure all the masks are either sparse or dense
        # If the use_sparse property is set to Ture or False,
        # it takes precedence.
        # If it is None, use sparse only if all masks are sparse
        # and set the use_sparse property accordingly

        if callable(self.mask_factories):
            raw_masks = self.mask_factories().astype(self.dtype)
            default_sparse = is_sparse(raw_masks)
            mask_slices = [raw_masks]
        else:
            mask_slices = []
            default_sparse = True
            for f in self.mask_factories:
                m = f().astype(self.dtype)
                # Scipy.sparse is always 2D, so we have to convert here
                # before reshaping
                if scipy.sparse.issparse(m):
                    m = sparse.COO.from_scipy_sparse(m)
                # We reshape to be a stack of 1 so that we can unify code below
                m = m.reshape((1, ) + m.shape)
                default_sparse = default_sparse and is_sparse(m)
                mask_slices.append(m)

        if self.use_sparse is None:
            self.use_sparse = default_sparse

        if self.use_sparse:
            masks = sparse.concatenate([to_sparse(m) for m in mask_slices])
        else:
            masks = np.concatenate([to_dense(m) for m in mask_slices])
        return masks
Exemple #10
0
 def _concat_same_type(cls, to_concat):
     return SparseExtensionArray(
         coords=sparse.concatenate([array.data for array in to_concat]))
Exemple #11
0
def test_concatenate_noarrays():
    with pytest.raises(ValueError):
        sparse.concatenate([])
Exemple #12
0
    def create_single(self, system):
        """Return the many-body tensor representation for the given system.

        Args:
            system (:class:`ase.Atoms` | :class:`.System`): Input system.

        Returns:
            dict | np.ndarray | sparse.COO: The return type is
            specified by the 'flatten' and 'sparse'-parameters. If the output
            is not flattened, a dictionary containing of MBTR outputs as numpy
            arrays is created. Each output is under a "kX" key. If the output
            is flattened, a single concatenated output vector is returned,
            either as a sparse or a dense vector.
        """
        # Transform the input system into the internal System-object
        system = self.get_system(system)

        # Ensuring variables are re-initialized when a new system is introduced
        self.system = system
        self._interaction_limit = len(system)

        # Check that the system does not have elements that are not in the list
        # of atomic numbers
        self.check_atomic_numbers(system.get_atomic_numbers())

        mbtr = {}
        if self.k1 is not None:
            mbtr["k1"] = self._get_k1(system)
        if self.k2 is not None:
            mbtr["k2"] = self._get_k2(system)
        if self.k3 is not None:
            mbtr["k3"] = self._get_k3(system)

        # Handle normalization
        if self.normalization == "l2_each":
            if self.flatten is True:
                for key, value in mbtr.items():
                    i_data = np.array(value.data)
                    i_norm = np.linalg.norm(i_data)
                    mbtr[key] = value / i_norm
            else:
                for key, value in mbtr.items():
                    i_data = value.ravel()
                    i_norm = np.linalg.norm(i_data)
                    mbtr[key] = value / i_norm
        elif self.normalization == "n_atoms":
            n_atoms = len(self.system)
            if self.flatten is True:
                for key, value in mbtr.items():
                    mbtr[key] = value / n_atoms
            else:
                for key, value in mbtr.items():
                    mbtr[key] = value / n_atoms

        # Flatten output if requested
        if self.flatten:
            keys = sorted(mbtr.keys())
            if len(keys) > 1:
                mbtr = sparse.concatenate([mbtr[key] for key in keys], axis=0)
            else:
                mbtr = mbtr[keys[0]]

            # Make into a dense array if requested
            if not self.sparse:
                mbtr = mbtr.todense()

        return mbtr
Exemple #13
0
 def conc(*XS):
     return sp.concatenate(XS, axis=axis) if iscoo(
         XS[0]) else np.concatenate(XS, axis=axis)
Exemple #14
0
def add_intercept(X):
    return sparse.concatenate([X, sparse.COO(np.ones((X.shape[0], 1)))],
                              axis=1)
    def initialize(self, rate_ion, rate1_cx, rate2_cx, t_ion, t1_edge, t2_edge,
                   n1_edge, n2_edge):
        for v in [rate_ion, rate1_cx, rate2_cx, t_ion]:
            if v.shape != self.r.shape:
                raise ValueError('Shape mismatch, {} and {}'.format(
                    v.shape, self.r.shape))
        self.rate1.initialize(rate_ion + rate1_cx + rate2_cx, rate1_cx, t_ion)
        self.rate2.initialize(rate_ion + rate1_cx + rate2_cx, rate2_cx, t_ion)
        self.t_ion = t_ion
        self.n1_edge = n1_edge
        self.n2_edge = n2_edge
        self.nt1_edge = n1_edge * t1_edge
        self.nt2_edge = n2_edge * t2_edge
        self.ntt1_edge = n1_edge * t1_edge * t1_edge
        self.ntt2_edge = n2_edge * t2_edge * t2_edge

        n = self.size - 1
        # prepare matrices
        # particle balance
        # atom1
        A_part1 = sparse.concatenate([
            -sparse.tensordot(self.rate1.Rij + self.rate1.Sij,
                              self.rate1.slice_l,
                              axes=(0, 0)),
            sparse.tensordot(self.rate1.Dij, self.rate1.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n)),
            -sparse.tensordot(self.rate1.Sij, self.rate1.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n)),
            sparse.COO([], [], shape=(n, n)),
        ],
                                     axis=0).T
        # atom2
        A_part2 = sparse.concatenate([
            -sparse.tensordot(self.rate2.Sij, self.rate2.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n)),
            sparse.COO([], [], shape=(n, n)),
            -sparse.tensordot(self.rate2.Rij + self.rate2.Sij,
                              self.rate2.slice_l,
                              axes=(0, 0)),
            sparse.tensordot(self.rate2.Dij, self.rate2.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n)),
        ],
                                     axis=0).T

        # energy balance
        # atom1
        A_engy1 = sparse.concatenate([
            -1.5 *
            sparse.tensordot(self.rate1.Eij, self.rate1.slice_l, axes=(0, 0)),
            -1.5 *
            sparse.tensordot(self.rate1.Rij, self.rate1.slice_l, axes=(0, 0)),
            2.5 *
            sparse.tensordot(self.rate1.Dij, self.rate1.slice_l, axes=(0, 0)),
            -1.5 *
            sparse.tensordot(self.rate1.Eij, self.rate2.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n)),
            sparse.COO([], [], shape=(n, n)),
        ],
                                     axis=0).T
        # atom2
        A_engy2 = sparse.concatenate([
            -1.5 *
            sparse.tensordot(self.rate2.Eij, self.rate1.slice_l, axes=(0, 0)),
            sparse.COO([], [], shape=(n, n)),
            sparse.COO([], [], shape=(n, n)),
            -1.5 *
            sparse.tensordot(self.rate2.Eij, self.rate2.slice_l, axes=(0, 0)),
            -1.5 *
            sparse.tensordot(self.rate2.Rij, self.rate2.slice_l, axes=(0, 0)),
            2.5 *
            sparse.tensordot(self.rate2.Dij, self.rate2.slice_l, axes=(0, 0)),
        ],
                                     axis=0).T

        # balance matrix.
        self.A = sparse.concatenate([A_part1, A_part2, A_engy1, A_engy2],
                                    axis=0)
        # boundary conditions
        b_part1 = (
            -self.n1_edge * sparse.tensordot(self.rate1.Rij + self.rate1.Sij,
                                             self.rate1.slice_last,
                                             axes=(0, 0)) +
            self.nt1_edge * sparse.tensordot(
                self.rate1.Dij, self.rate1.slice_last, axes=(0, 0)) -
            self.n2_edge * sparse.tensordot(
                self.rate1.Sij, self.rate1.slice_last, axes=(0, 0))).todense()

        b_part2 = (
            -self.n2_edge * sparse.tensordot(self.rate2.Rij + self.rate2.Sij,
                                             self.rate2.slice_last,
                                             axes=(0, 0)) +
            self.nt2_edge * sparse.tensordot(
                self.rate2.Dij, self.rate2.slice_last, axes=(0, 0)) -
            self.n1_edge * sparse.tensordot(
                self.rate2.Sij, self.rate2.slice_last, axes=(0, 0))).todense()

        b_engy1 = (
            -1.5 * self.n1_edge * sparse.tensordot(
                self.rate1.Eij, self.rate1.slice_last, axes=(0, 0)) -
            1.5 * self.nt1_edge * sparse.tensordot(
                self.rate1.Rij, self.rate1.slice_last, axes=(0, 0)) +
            2.5 * self.ntt1_edge * sparse.tensordot(
                self.rate1.Dij, self.rate1.slice_last, axes=(0, 0)) -
            1.5 * self.n2_edge * sparse.tensordot(
                self.rate1.Eij, self.rate1.slice_last, axes=(0, 0))).todense()

        b_engy2 = (
            -1.5 * self.n2_edge * sparse.tensordot(
                self.rate2.Eij, self.rate2.slice_last, axes=(0, 0)) -
            1.5 * self.nt2_edge * sparse.tensordot(
                self.rate2.Rij, self.rate2.slice_last, axes=(0, 0)) +
            2.5 * self.ntt2_edge * sparse.tensordot(
                self.rate2.Dij, self.rate2.slice_last, axes=(0, 0)) -
            1.5 * self.n1_edge * sparse.tensordot(
                self.rate2.Eij, self.rate2.slice_last, axes=(0, 0))).todense()

        self.b = -np.concatenate([b_part1, b_part2, b_engy1, b_engy2])

        # matrix for the constraint
        L1 = scipy.sparse.hstack([
            scipy.sparse.identity(n),
            scipy.sparse.identity(n) * (-2.0),
            scipy.sparse.identity(n),
            scipy.sparse.coo_matrix((n, n)),
            scipy.sparse.coo_matrix((n, n)),
            scipy.sparse.coo_matrix((n, n))
        ])
        L2 = scipy.sparse.hstack([
            scipy.sparse.coo_matrix((n, n)),
            scipy.sparse.coo_matrix((n, n)),
            scipy.sparse.coo_matrix((n, n)),
            scipy.sparse.identity(n),
            scipy.sparse.identity(n) * (-2.0),
            scipy.sparse.identity(n)
        ])
        self.L = scipy.sparse.vstack([L1, L2])
Exemple #16
0
    def derivatives_parallel(
        self,
        inp,
        func,
        n_jobs,
        derivatives_shape,
        descriptor_shape,
        return_descriptor,
        only_physical_cores=False,
        verbose=False,
        prefer="processes",
    ):
        """Used to parallelize the descriptor creation across multiple systems.

        Args:
            inp(list): Contains a tuple of input arguments for each processed
                system. These arguments are fed to the function specified by
                "func".
            func(function): Function that outputs the descriptor when given
                input arguments from "inp".
            n_jobs (int): Number of parallel jobs to instantiate. Parallellizes
                the calculation across samples. Defaults to serial calculation
                with n_jobs=1. If a negative number is given, the number of jobs
                will be calculated with, n_cpus + n_jobs, where n_cpus is the
                amount of CPUs as reported by the OS. With only_physical_cores
                you can control which types of CPUs are counted in n_cpus.
            derivatives_shape(list or None): If a fixed size output is produced from
                each job, this contains its shape. For variable size output
                this parameter is set to None
            derivatives_shape(list or None): If a fixed size output is produced from
                each job, this contains its shape. For variable size output
                this parameter is set to None
            only_physical_cores (bool): If a negative n_jobs is given,
                determines which types of CPUs are used in calculating the
                number of jobs. If set to False (default), also virtual CPUs
                are counted.  If set to True, only physical CPUs are counted.
            verbose(bool): Controls whether to print the progress of each job
                into to the console.
            prefer(str): The parallelization method. Valid options are:

                - "processes": Parallelization based on processes. Uses the
                  "loky" backend in joblib to serialize the jobs and run them
                  in separate processes. Using separate processes has a bigger
                  memory and initialization overhead than threads, but may
                  provide better scalability if perfomance is limited by the
                  Global Interpreter Lock (GIL).

                - "threads": Parallelization based on threads. Has bery low
                  memory and initialization overhead. Performance is limited by
                  the amount of pure python code that needs to run. Ideal when
                  most of the calculation time is used by C/C++ extensions that
                  release the GIL.

        Returns:
            np.ndarray | sparse.COO | list: The descriptor output
            for each given input. The return type depends on the desciptor
            setup.
        """
        # Determine the number of jobs
        if n_jobs < 0:
            n_jobs = joblib.cpu_count(only_physical_cores) + n_jobs
        if n_jobs <= 0:
            raise ValueError("Invalid number of jobs specified.")

        # Split data into n_jobs (almost) equal jobs
        n_samples = len(inp)
        is_sparse = self._sparse
        k, m = divmod(n_samples, n_jobs)
        jobs = (inp[i * k + min(i, m):(i + 1) * k + min(i + 1, m)]
                for i in range(n_jobs))

        def create_multiple_with_descriptor(arguments, func, index, verbose):
            """This is the function that is called by each job but with
            different parts of the data.
            """
            # Initialize output
            n_samples = len(arguments)
            if derivatives_shape:
                shape_der = [n_samples]
                shape_der.extend(derivatives_shape)
                if is_sparse:
                    data_der = []
                    coords_der = []
                else:
                    derivatives = np.empty(shape_der, dtype=self.dtype)
            else:
                derivatives = []
            if descriptor_shape:
                shape_des = [n_samples]
                shape_des.extend(descriptor_shape)
                if is_sparse:
                    data_des = []
                    coords_des = []
                else:
                    descriptors = np.empty(shape_des, dtype=self.dtype)
            else:
                descriptors = []
            old_percent = 0

            # Loop through all samples assigned for this job
            for i_sample, i_arg in enumerate(arguments):
                i_der, i_des = func(*i_arg)
                if descriptor_shape:
                    if is_sparse:
                        sample_index = np.full((1, i_des.data.size), i_sample)
                        data_des.append(i_des.data)
                        coords_des.append(
                            np.vstack((sample_index, i_des.coords)))
                    else:
                        descriptors[i_sample] = i_des
                else:
                    descriptors.append(i_des)

                if derivatives_shape:
                    if is_sparse:
                        sample_index = np.full((1, i_der.data.size), i_sample)
                        data_der.append(i_der.data)
                        coords_der.append(
                            np.vstack((sample_index, i_der.coords)))
                    else:
                        derivatives[i_sample] = i_der
                else:
                    derivatives.append(i_der)

                if verbose:
                    current_percent = (i_sample + 1) / n_samples * 100
                    if current_percent >= old_percent + 1:
                        old_percent = current_percent
                        print("Process {0}: {1:.1f} %".format(
                            index, current_percent))

            if is_sparse:
                if descriptor_shape is not None:
                    data_des = np.concatenate(data_des)
                    coords_des = np.concatenate(coords_des, axis=1)
                    descriptors = sp.COO(coords_des, data_des, shape=shape_des)
                if derivatives_shape is not None:
                    data_der = np.concatenate(data_der)
                    coords_der = np.concatenate(coords_der, axis=1)
                    derivatives = sp.COO(coords_der, data_der, shape=shape_der)

            return ((derivatives, descriptors), index)

        def create_multiple_without_descriptor(arguments, func, index,
                                               verbose):
            """This is the function that is called by each job but with
            different parts of the data.
            """
            # Initialize output
            n_samples = len(arguments)
            if derivatives_shape:
                shape_der = [n_samples]
                shape_der.extend(derivatives_shape)
                if is_sparse:
                    data_der = []
                    coords_der = []
                else:
                    derivatives = np.empty(shape_der, dtype=self.dtype)
            else:
                derivatives = []

            old_percent = 0

            # Loop through all samples assigned for this job
            for i_sample, i_arg in enumerate(arguments):
                i_der = func(*i_arg)
                if derivatives_shape:
                    if is_sparse:
                        sample_index = np.full((1, i_der.data.size), i_sample)
                        data_der.append(i_der.data)
                        coords_der.append(
                            np.vstack((sample_index, i_der.coords)))
                    else:
                        derivatives[i_sample] = i_der
                else:
                    derivatives.append(i_der)

                if verbose:
                    current_percent = (i_sample + 1) / n_samples * 100
                    if current_percent >= old_percent + 1:
                        old_percent = current_percent
                        print("Process {0}: {1:.1f} %".format(
                            index, current_percent))

            if is_sparse and derivatives_shape is not None:
                data_der = np.concatenate(data_der)
                coords_der = np.concatenate(coords_der, axis=1)
                derivatives = sp.COO(coords_der, data_der, shape=shape_der)
            return ((derivatives, ), index)

        if return_descriptor:
            vec_lists = Parallel(n_jobs=n_jobs, prefer=prefer)(
                delayed(create_multiple_with_descriptor)(i_args, func, index,
                                                         verbose)
                for index, i_args in enumerate(jobs))
        else:
            vec_lists = Parallel(n_jobs=n_jobs, prefer=prefer)(
                delayed(create_multiple_without_descriptor)(i_args, func,
                                                            index, verbose)
                for index, i_args in enumerate(jobs))

        # Restore the calculation order. If using the threading backend, the
        # input order may have been lost.
        vec_lists.sort(key=lambda x: x[1])

        # If the results are of the same length, we can simply concatenate them
        # into one numpy array. Otherwise we will return a regular python list.
        der_lists = [x[0][0] for x in vec_lists]
        if derivatives_shape:
            if is_sparse:
                derivatives = sp.concatenate(der_lists, axis=0)
            else:
                derivatives = np.concatenate(der_lists, axis=0)
        else:
            derivatives = []
            for part in der_lists:
                derivatives.extend(part)
        if return_descriptor:
            des_lists = [x[0][1] for x in vec_lists]
            if descriptor_shape:
                if is_sparse:
                    descriptors = sp.concatenate(des_lists, axis=0)
                else:
                    descriptors = np.concatenate(des_lists, axis=0)
            else:
                descriptors = []
                for part in des_lists:
                    descriptors.extend(part)
            return (derivatives, descriptors)

        return derivatives
Exemple #17
0
def main(param_file, runs):
    base_path = Path(__file__).parent  # absolute working directory path

    # Read parameters from file
    params = open(base_path / param_file, 'r')
    parameters = {}  # empty parameter dictionary
    for x in params:
        x = x.rstrip()
        x = x.split(': ')
        try:
            float(x[1])
            parameters[x[0]] = float(
                x[1])  # creates dictionary with parameters
        except ValueError:
            parameters[x[0]] = x[1]  # creates dictionary with parameters
    params.close()

    sparse_matrix = sparse.load_npz(base_path / parameters['Network'])
    #print(sparse_matrix)  # prints network info (for testing)

    temp = parameters['Output Directory']
    output = base_path / parameters['Output Directory']

    # remove dirctory if empty
    try:
        os.rmdir(output)
    except OSError as ex:
        pass

    os.mkdir(output)

    # set parameters
    parameters['Output Directory'] = str(output) + '/'
    parameters['Students'] = sparse_matrix.shape[
        1]  # only used with two networks
    parameters['Adjusted ICU'] = parameters['ICU Rate'] / parameters[
        'Breath Rate']  # adjusts ICU rate for simulation
    parameters['Adjusted ICU 2'] = parameters['ICU Rate 2'] / parameters[
        'Breath Rate 2']  # adjusts ICU rate for simulation

    # get course length and room sizes from csv
    if parameters['Class Mode'] == 1:
        class_file = str(base_path / parameters['Courses'])
        df = pd.read_csv(class_file, usecols=['start', 'end', 'capacity'])
        df['start'] = pd.to_datetime(df['start'])
        df['end'] = pd.to_datetime(df['end'])

        # calculate difference between start and end times in hours
        df['diff'] = df['end'] - df['start']
        df['diff'] = df['diff'] / np.timedelta64(1, 'h')

        # get capacity and class start and end times from course_info csv
        class_length = df['diff'].tolist()
        room_size = df['capacity'].tolist()

    # adds the two networks together, results ONLY report for second network
    if parameters['Other Network'] != 'None':
        temp_matrix = sparse.load_npz(base_path / parameters['Other Network'])
        sparse_matrix = sparse.concatenate((sparse_matrix, temp_matrix),
                                           axis=1)
        # print(sparse_matrix)  # prints network info (for testing)

    # In person and Online class distribution
    classes_type = [0] * (sparse_matrix.shape[2])
    classes_size = [0] * (sparse_matrix.shape[2])
    inPerson_classes = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: []}
    inPerson_people = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: []}
    # inPerson_infectability = {0:[], 1:[], 2:[], 3:[], 4:[], 5:[], 6:[]}
    for day in range(5):
        sizes = sparse_matrix[day].sum(axis=0)
        for class_id in range(sparse_matrix.shape[2]):
            classes_size[class_id] = max(classes_size[class_id],
                                         sizes[class_id])

    for class_id in range(sparse_matrix.shape[2]):
        if classes_size[class_id] <= parameters[
                'Upper Limit Offline Class Size'] and classes_size[
                    class_id] > 1:
            classes_type[class_id] = 1

    for day in range(5):
        sizes = sparse_matrix[day].sum(axis=0)
        for course in range(sparse_matrix.shape[2]):
            if (sizes[course] <= parameters['Upper Limit Offline Class Size']
                    and sizes[course] > 1):
                inPerson_classes[day].append(course)
                inPerson_people[day] = list(
                    set(inPerson_people[day])
                    | set(sparse_matrix[day, :, course].nonzero()[0]))

    # calculate infectability
    infectability = [0] * (sparse_matrix.shape[1])
    for person in range(sparse_matrix.shape[1]):
        enrolled_courses = [
            course_id for day in range(5)
            for course_id in sparse_matrix[day, person].nonzero()[0]
        ]
        infection = sum([
            classes_size[course_id] for course_id in enrolled_courses
            if classes_type[course_id] == 1
        ])
        if infection == 0:
            infectability[person] = 0
        else:
            infectability[person] = math.log(
                infection, 2)  # infectability = log(weekly interactions, 2)
        # print("person:", person)
        # print("courses:", enrolled_courses)
        # print ("Infectability:", infectability[person])
    # ds = [infection for infection in infectability if infection > 0]
    # num_bins = 100
    # a = plt.hist(ds, num_bins, facecolor='blue', alpha=0.5)

    # with open('person_infectability.csv', 'w+', newline = '') as f:
    #     writer = csv.writer(f)
    #     l = []
    #     writer.writerow(l)

    #     l.append('NumPerson')
    #     l.append('Infectability')
    #     writer.writerow(l)
    #     for i in range(len(a[0])):
    #         l = []
    #         l.append(a[0][i])
    #         l.append(a[1][i])
    #         writer.writerow(l)
    # f.close()
    #n, bins, patches = plt.hist(ds, num_bins, facecolor='blue', alpha=0.5)
    #plt.show()

    for day in range(5):
        print("Day:", day, "In person Classes", len(inPerson_classes[day]),
              "In person People", len(inPerson_people[day]))

    # for day in range(5):
    #     course_people_infectability = {person_:infectability[person_] for person_ in inPerson_people[day]}
    #     inPerson_infectability[day] = [(k,v) for k, v in sorted(course_people_infectability.items(), key=lambda item: item[1])][::-1]

    # set more parameters
    parameters['Threshold'] = 0.0034 * sparse_matrix.shape[
        1]  # used for calculating likelihood of catching disease, scaled with population size
    parameters['Patient Zero'] = sparse_matrix.shape[1] * parameters[
        'Initial Infected Population']
    # if parameters['Test Upon Entry']:
    #     Initially_infected = sparse_matrix.shape[1] * parameters['Initial Infected Population']
    #     False_positve_cases = (sparse_matrix.shape[1] - Initially_infected) * parameters['Asymptomatic False Positive Rate']
    #     False_negative_cases = Initially_infected * parameters['Asymptomatic False Negative Rate']
    #     parameters['Patient Zero'] =  Initially_infected - False_negative_cases + False_positve_cases # number of seeded infected, scaled with population size
    # else:
    #     parameters['Patient Zero'] = sparse_matrix.shape[1] * parameters['Initial Infected Population']
    print("Seeds:", parameters['Patient Zero'])

    p = Pool(processes=1)  # max 10 processes

    for i in range(runs):
        # iteration seeding start
        seed = scipy.sparse.lil_matrix(
            (sparse_matrix.shape[0],
             sparse_matrix.shape[1]))  # modifiable time person matrix
        test_state = scipy.sparse.lil_matrix(
            (sparse_matrix.shape[0], sparse_matrix.shape[1]))
        #test_state[0,:] = 0
        days_infected = [
            0
        ] * sparse_matrix.shape[1]  # days infected for each person

        uniqueDay0 = np.unique(sparse_matrix[
            0, :, :].nonzero())  # gets unique users from first day
        z = 0  # tracks number seeded (to prevent seeding same person twice)

        while z < parameters[
                'Patient Zero']:  # sets n number of check-ins on day 0 to infection
            pat_zero = random.choice(uniqueDay0)  # get random person
            if (z == 0):  # guaranatees a spreader
                seed[0, pat_zero] = 2  # asymptomatic spreader
                days_infected[pat_zero] = int(parameters['Incubation Mean'])
                #days_infected[pat_zero] = int(parameters['Incubation Mean']) - 2
                z += 1

            elif (seed[0, pat_zero] == 0):
                seed[0, pat_zero] = 1  # start as incubating
                days_infected[pat_zero] = int(
                    (parameters['Incubation Mean'] - 1) -
                    (z % (parameters['Incubation Mean'] - 1)))
                #days_infected[pat_zero] = int((parameters['Incubation Mean'] - 3) - (z % (parameters['Incubation Mean'] - 3)))
                z += 1

        #extra_test_capacity = parameters['Test Capacity']
        #run_test(sparse_matrix, 0, seed, test_state, parameters, extra_test_capacity, infectability, inPerson_people, inPerson_classes, test_classes)

        #run_sim(seed, test_state, days_infected, sparse_matrix, classes_type, classes_size, parameters, i, class_length, room_size, infectability, inPerson_people, inPerson_classes)

        p.apply_async(run_sim,
                      args=(
                          seed,
                          test_state,
                          days_infected,
                          sparse_matrix,
                          classes_type,
                          classes_size,
                          parameters,
                          i,
                          class_length,
                          room_size,
                          infectability,
                          inPerson_people,
                          inPerson_classes,
                      ))
        # scipy.sparse.save_npz(first_save_loc + '/state_matrix.npz', seed.tocoo())
    p.close()
    p.join()
    # find mean, standard error and auto generates an SEIR graph
    getMean(sparse_matrix.shape[0], parameters['Output Directory'], temp)
Exemple #18
0
    def to_sparse_array(self,
                        min_days=90,
                        max_days=None,
                        rebase=True,
                        **kwargs):
        if self.aggregated_data is not None:
            clients = []
            if max_days is None:
                max_days = int(self.aggregated_data['rel_day'].max()
                               ) + 1  # infer from data
                if max_days < 91:
                    max_days = 91
                self.log.info(
                    'max_days inferred from data: {}'.format(max_days))

            if rebase:
                assert min_days < max_days, 'It is not possible to rebase with min_days={}, max_days={}'.format(
                    min_days, max_days)
                self.log.info(
                    'Rebasing observations with min_days={}'.format(min_days))

            num_col = int(self.aggregated_data.shape[-1]) - 1
            client_shape = [1, max_days, num_col]

            agg_by_id = self.aggregated_data.groupby('cl_id')

            for cl_id, cl_group in agg_by_id:
                client_array = np.zeros(client_shape)
                client_values = cl_group.values
                client_index = client_values[:, 1].astype(
                    int)  # second column - rel_day values --> to 0_dim values

                if rebase:
                    # Rebase but allow no less than 90 days observation period:
                    client_max_day = client_index.max()
                    if client_max_day < min_days:
                        client_max_day = min_days
                    rebase_index = max_days - client_max_day - 1
                    client_index += rebase_index

                client_array[:,
                             client_index, :] = client_values[:,
                                                              1:]  # remove cl_id (gone to new dim) and rel_day

                # Fill all records for single client:
                client_array[..., 0] = int(cl_id)  # id

                if np.isnan(cl_group.target_sum).any():
                    client_array[..., -1] = float('NaN')
                    client_array[..., -2] = float('NaN')

                else:
                    client_array[..., -1] = cl_group.target_sum.mean()
                    client_array[..., -2] = cl_group.target_flag.all()

                # Save as sparse 3d array:
                clients.append(sparse.COO(client_array.astype('float32')))

            full_array_sparse = sparse.concatenate(clients, axis=0)

        else:
            self.log.warning(
                'No aggregated data found, call .aggregate_by_daily_sums() method first.'
            )
            full_array_sparse = None

        return full_array_sparse
Exemple #19
0
def test_upcast_2(a, b):
    assert sparse.concatenate((a, a)).indptr.dtype == np.uint16
    assert sparse.stack((b, b)).indptr.dtype == np.uint16
Exemple #20
0
    def create_single(
        self,
        system,
        positions=None,
    ):
        """Return the local many-body tensor representation for the given
        system and positions.

        Args:
            system (:class:`ase.Atoms` | :class:`.System`): Input system.
            positions (iterable): Positions or atom index of points, from
                which local_mbtr is created. Can be a list of integer numbers
                or a list of xyz-coordinates. If integers provided, the atoms
                at that index are used as centers. If positions provided, new
                atoms are added at that position. If no positions are provided,
                all atoms in the system will be used as centers.

        Returns:
            1D ndarray: The local many-body tensor representations of given
            positions, for k terms, as an array. These are ordered as given in
            positions.
        """
        # Transform the input system into the internal System-object
        system = self.get_system(system)

        # Check that the system does not have elements that are not in the list
        # of atomic numbers
        atomic_number_set = set(system.get_atomic_numbers())
        self.check_atomic_numbers(atomic_number_set)
        self._interaction_limit = len(system)
        system_positions = system.get_positions()
        system_atomic_numbers = system.get_atomic_numbers()

        # Ensure that the atomic number 0 is not present in the system
        if 0 in atomic_number_set:
            raise ValueError(
                "Please do not use the atomic number 0 in local MBTR as it "
                "is reserved to mark the atoms use as analysis centers.")

        # Form a list of indices, positions and atomic numbers for the local
        # centers. k=3 and k=2 use a slightly different approach, so two
        # versions are built
        i_new = len(system)
        indices_k2 = []
        new_pos_k2 = []
        new_atomic_numbers_k2 = []
        indices_k3 = []
        new_pos_k3 = []
        new_atomic_numbers_k3 = []
        n_atoms = len(system)
        if positions is not None:
            n_loc = len(positions)

            # Check validity of position definitions and create final cartesian
            # position list
            if len(positions) == 0:
                raise ValueError(
                    "The argument 'positions' should contain a non-empty set of"
                    " atomic indices or cartesian coordinates with x, y and z "
                    "components.")
            for i in positions:
                if np.issubdtype(type(i), np.integer):
                    i_len = len(system)
                    if i >= i_len or i < 0:
                        raise ValueError(
                            "The provided index {} is not valid for the system "
                            "with {} atoms.".format(i, i_len))
                    indices_k2.append(i)
                    indices_k3.append(i)
                    new_pos_k2.append(system_positions[i])
                    new_atomic_numbers_k2.append(system_atomic_numbers[i])
                elif isinstance(i, (list, tuple, np.ndarray)):
                    if len(i) != 3:
                        raise ValueError(
                            "The argument 'positions' should contain a "
                            "non-empty set of atomic indices or cartesian "
                            "coordinates with x, y and z components.")
                    new_pos_k2.append(np.array(i))
                    new_pos_k3.append(np.array(i))
                    new_atomic_numbers_k2.append(0)
                    new_atomic_numbers_k3.append(0)
                    i_new += 1
                else:
                    raise ValueError(
                        "Create method requires the argument 'positions', a "
                        "list of atom indices and/or positions.")
        # If positions are not supplied, it is assumed that each atom is used
        # as a center
        else:
            n_loc = n_atoms
            indices_k2 = np.arange(n_atoms)
            indices_k3 = np.arange(n_atoms)
            new_pos_k2 = system.get_positions()
            new_atomic_numbers_k2 = system.get_atomic_numbers()

        # Calculate the "raw" outputs for each term.
        mbtr = {}
        if self.k2 is not None:
            new_system_k2 = System(
                symbols=new_atomic_numbers_k2,
                positions=new_pos_k2,
            )
            mbtr["k2"] = self._get_k2(system, new_system_k2, indices_k2)

        if self.k3 is not None:
            new_system_k3 = System(
                symbols=new_atomic_numbers_k3,
                positions=new_pos_k3,
            )
            mbtr["k3"] = self._get_k3(system, new_system_k3, indices_k3)

        # Handle normalization
        if self.normalization == "l2_each":
            if self.flatten is True:
                for key, value in mbtr.items():
                    norm = np.linalg.norm(value.data)
                    value /= norm
            else:
                for key, value in mbtr.items():
                    for array in value:
                        i_data = array.ravel()
                        i_norm = np.linalg.norm(i_data)
                        array /= i_norm

        # Flatten output if requested
        if self.flatten:
            keys = sorted(mbtr.keys())
            if len(keys) > 1:
                result = sparse.concatenate([mbtr[key] for key in keys],
                                            axis=1)
            else:
                result = mbtr[keys[0]]

            # Make into a dense array if requested
            if not self.sparse:
                result = result.todense()

        # Otherwise return a list of dictionaries, each dictionary containing
        # the requested unflattened tensors
        else:
            result = np.empty((n_loc), dtype="object")
            for i_loc in range(n_loc):
                i_dict = {}
                for key in mbtr.keys():
                    tensor = mbtr[key]
                    i_dict[key] = tensor[i_loc]
                result[i_loc] = i_dict

        return result