def test_concatenate(): xx = sparse.random((2, 3, 4), density=0.5, format="gcxs") x = xx.todense() yy = sparse.random((5, 3, 4), density=0.5, format="gcxs") y = yy.todense() zz = sparse.random((4, 3, 4), density=0.5, format="gcxs") z = zz.todense() assert_eq( np.concatenate([x, y, z], axis=0), sparse.concatenate([xx, yy, zz], axis=0) ) xx = sparse.random((5, 3, 1), density=0.5, format="gcxs") x = xx.todense() yy = sparse.random((5, 3, 3), density=0.5, format="gcxs") y = yy.todense() zz = sparse.random((5, 3, 2), density=0.5, format="gcxs") z = zz.todense() assert_eq( np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2) ) assert_eq( np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1) )
def initialize(self, rate_ion, rate_cx, t_ion, t_edge, n_edge): for v in [rate_ion, rate_cx, t_ion]: if v.shape != self.r.shape: raise ValueError('Shape mismatch, {} and {}'.format( v.shape, self.r.shape)) self.rate.initialize(rate_ion + rate_cx, rate_cx, t_ion) self.t_ion = t_ion self.n_edge = n_edge self.nt_edge = n_edge * t_edge self.ntt_edge = n_edge * t_edge * t_edge n = self.size - 1 # prepare matrices # matrix for the particle balance A_part = sparse.concatenate([ -sparse.tensordot( self.rate.Rij + self.rate.Sij, self.rate.slice_l, axes=(0, 0)), sparse.tensordot(self.rate.Dij, self.rate.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)) ], axis=0).T # matrix for the energy balance A_engy = sparse.concatenate([ -1.5 * sparse.tensordot(self.rate.Eij, self.rate.slice_l, axes=(0, 0)), -1.5 * sparse.tensordot(self.rate.Rij, self.rate.slice_l, axes=(0, 0)), 2.5 * sparse.tensordot(self.rate.Dij, self.rate.slice_l, axes=(0, 0)) ], axis=0).T # balance matrix. self.A = sparse.concatenate([A_part, A_engy], axis=0) # boundary conditions b_part = (-self.n_edge * sparse.tensordot( self.rate.Rij + self.rate.Sij, self.rate.slice_last, axes=(0, 0)) + self.nt_edge * sparse.tensordot( self.rate.Dij, self.rate.slice_last, axes=(0, 0)) ).todense() b_engy = ( -1.5 * self.n_edge * sparse.tensordot(self.rate.Eij, self.rate.slice_last, axes=(0, 0)) - 1.5 * self.nt_edge * sparse.tensordot( self.rate.Rij, self.rate.slice_last, axes=(0, 0)) + 2.5 * self.ntt_edge * sparse.tensordot( self.rate.Dij, self.rate.slice_last, axes=(0, 0))).todense() self.b = -np.concatenate([b_part, b_engy]) # matrix for the constraint self.L = scipy.sparse.hstack([ scipy.sparse.identity(n), scipy.sparse.identity(n) * (-2.0), scipy.sparse.identity(n) ])
def test_concatenate_2(xx, yy, zz): x = xx.todense() y = yy.todense() z = zz.todense() assert_eq( np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2) ) assert_eq( np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1) )
def stack(): rings = masks.radial_bins(centerX=cx, centerY=cy, imageSizeX=detector_x, imageSizeY=detector_y, radius=ro, radius_inner=ri, n_bins=n_bins, use_sparse=use_sparse, dtype=np.complex64) orders = np.arange(max_order + 1) r, phi = masks.polar_map(centerX=cx, centerY=cy, imageSizeX=detector_x, imageSizeY=detector_y) modulator = np.exp(phi * orders[:, np.newaxis, np.newaxis] * 1j) if use_sparse: rings = rings.reshape((rings.shape[0], 1, *rings.shape[1:])) ring_stack = [rings] * len(orders) ring_stack = sparse.concatenate(ring_stack, axis=1) ring_stack *= modulator else: ring_stack = rings[:, np.newaxis, ...] * modulator return ring_stack.reshape((-1, detector_y, detector_x))
def concat(L): if isinstance(L[0], np.ndarray): return np.concatenate(L, axis=0) elif isinstance(L[0], (pd.DataFrame, pd.Series)): return pd.concat(L, axis=0) elif ss and isinstance(L[0], ss.spmatrix): return ss.vstack(L, format='csr') elif sparse and isinstance(L[0], sparse.SparseArray): return sparse.concatenate(L, axis=0) else: raise TypeError("Data must be either numpy arrays or pandas dataframes. Got %s" % type(L[0]))
def concat(seq): if isinstance(seq[0], np.ndarray): return np.concatenate(seq, axis=0) elif isinstance(seq[0], (pd.DataFrame, pd.Series)): return pd.concat(seq, axis=0) elif ss and isinstance(seq[0], ss.spmatrix): return ss.vstack(seq, format='csr') elif sparse and isinstance(seq[0], sparse.SparseArray): return sparse.concatenate(seq, axis=0) else: raise TypeError( 'Data must be one of: numpy arrays, pandas dataframes, sparse matrices ' f'(from scipy or from sparse). Got {type(seq[0])}.')
def _compute_masks(self): """ Call mask factories and combine to mask stack Returns ------- a list of masks with contents as they were created by the factories and converted uniformly to dense or sparse matrices depending on ``self.use_sparse``. """ # Make sure all the masks are either sparse or dense # If the use_sparse property is set to Ture or False, # it takes precedence. # If it is None, use sparse only if all masks are sparse # and set the use_sparse property accordingly default_sparse = 'scipy.sparse' if callable(self.mask_factories): raw_masks = self.mask_factories() if not is_sparse(raw_masks): default_sparse = False mask_slices = [raw_masks] else: mask_slices = [] for f in self.mask_factories: m = f() # Scipy.sparse is always 2D, so we have to convert here # before reshaping if scipy.sparse.issparse(m): m = sparse.COO.from_scipy_sparse(m) # We reshape to be a stack of 1 so that we can unify code below m = m.reshape((1, ) + m.shape) if not is_sparse(m): default_sparse = False mask_slices.append(m) if self._use_sparse is None: self._use_sparse = default_sparse if self.use_sparse: # Conversion to correct back-end will happen later # Use sparse.pydata because it implements the array interface # which makes mask handling easier masks = sparse.concatenate([to_sparse(m) for m in mask_slices]) else: masks = np.concatenate([to_dense(m) for m in mask_slices]) return masks
def test_upcast(): a = sparse.random((50, 50, 50), density=0.1, format="coo", idx_dtype=np.uint8) b = a.asformat("gcxs") assert b.indices.dtype == np.uint16 a = sparse.random((8, 7, 6), density=0.5, format="gcxs", idx_dtype=np.uint8) b = sparse.random((6, 6, 6), density=0.8, format="gcxs", idx_dtype=np.uint8) assert sparse.concatenate((a, a)).indptr.dtype == np.uint16 assert sparse.stack((b, b)).indptr.dtype == np.uint16
def _compute_masks(self): """ Call mask factories and convert to the dataset dtype Returns ------- a list of masks with contents as they were created by the factories and converted uniformly to dense or sparse matrices depending on ``self.use_sparse``. """ # Make sure all the masks are either sparse or dense # If the use_sparse property is set to Ture or False, # it takes precedence. # If it is None, use sparse only if all masks are sparse # and set the use_sparse property accordingly if callable(self.mask_factories): raw_masks = self.mask_factories().astype(self.dtype) default_sparse = is_sparse(raw_masks) mask_slices = [raw_masks] else: mask_slices = [] default_sparse = True for f in self.mask_factories: m = f().astype(self.dtype) # Scipy.sparse is always 2D, so we have to convert here # before reshaping if scipy.sparse.issparse(m): m = sparse.COO.from_scipy_sparse(m) # We reshape to be a stack of 1 so that we can unify code below m = m.reshape((1, ) + m.shape) default_sparse = default_sparse and is_sparse(m) mask_slices.append(m) if self.use_sparse is None: self.use_sparse = default_sparse if self.use_sparse: masks = sparse.concatenate([to_sparse(m) for m in mask_slices]) else: masks = np.concatenate([to_dense(m) for m in mask_slices]) return masks
def _concat_same_type(cls, to_concat): return SparseExtensionArray( coords=sparse.concatenate([array.data for array in to_concat]))
def test_concatenate_noarrays(): with pytest.raises(ValueError): sparse.concatenate([])
def create_single(self, system): """Return the many-body tensor representation for the given system. Args: system (:class:`ase.Atoms` | :class:`.System`): Input system. Returns: dict | np.ndarray | sparse.COO: The return type is specified by the 'flatten' and 'sparse'-parameters. If the output is not flattened, a dictionary containing of MBTR outputs as numpy arrays is created. Each output is under a "kX" key. If the output is flattened, a single concatenated output vector is returned, either as a sparse or a dense vector. """ # Transform the input system into the internal System-object system = self.get_system(system) # Ensuring variables are re-initialized when a new system is introduced self.system = system self._interaction_limit = len(system) # Check that the system does not have elements that are not in the list # of atomic numbers self.check_atomic_numbers(system.get_atomic_numbers()) mbtr = {} if self.k1 is not None: mbtr["k1"] = self._get_k1(system) if self.k2 is not None: mbtr["k2"] = self._get_k2(system) if self.k3 is not None: mbtr["k3"] = self._get_k3(system) # Handle normalization if self.normalization == "l2_each": if self.flatten is True: for key, value in mbtr.items(): i_data = np.array(value.data) i_norm = np.linalg.norm(i_data) mbtr[key] = value / i_norm else: for key, value in mbtr.items(): i_data = value.ravel() i_norm = np.linalg.norm(i_data) mbtr[key] = value / i_norm elif self.normalization == "n_atoms": n_atoms = len(self.system) if self.flatten is True: for key, value in mbtr.items(): mbtr[key] = value / n_atoms else: for key, value in mbtr.items(): mbtr[key] = value / n_atoms # Flatten output if requested if self.flatten: keys = sorted(mbtr.keys()) if len(keys) > 1: mbtr = sparse.concatenate([mbtr[key] for key in keys], axis=0) else: mbtr = mbtr[keys[0]] # Make into a dense array if requested if not self.sparse: mbtr = mbtr.todense() return mbtr
def conc(*XS): return sp.concatenate(XS, axis=axis) if iscoo( XS[0]) else np.concatenate(XS, axis=axis)
def add_intercept(X): return sparse.concatenate([X, sparse.COO(np.ones((X.shape[0], 1)))], axis=1)
def initialize(self, rate_ion, rate1_cx, rate2_cx, t_ion, t1_edge, t2_edge, n1_edge, n2_edge): for v in [rate_ion, rate1_cx, rate2_cx, t_ion]: if v.shape != self.r.shape: raise ValueError('Shape mismatch, {} and {}'.format( v.shape, self.r.shape)) self.rate1.initialize(rate_ion + rate1_cx + rate2_cx, rate1_cx, t_ion) self.rate2.initialize(rate_ion + rate1_cx + rate2_cx, rate2_cx, t_ion) self.t_ion = t_ion self.n1_edge = n1_edge self.n2_edge = n2_edge self.nt1_edge = n1_edge * t1_edge self.nt2_edge = n2_edge * t2_edge self.ntt1_edge = n1_edge * t1_edge * t1_edge self.ntt2_edge = n2_edge * t2_edge * t2_edge n = self.size - 1 # prepare matrices # particle balance # atom1 A_part1 = sparse.concatenate([ -sparse.tensordot(self.rate1.Rij + self.rate1.Sij, self.rate1.slice_l, axes=(0, 0)), sparse.tensordot(self.rate1.Dij, self.rate1.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)), -sparse.tensordot(self.rate1.Sij, self.rate1.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)), sparse.COO([], [], shape=(n, n)), ], axis=0).T # atom2 A_part2 = sparse.concatenate([ -sparse.tensordot(self.rate2.Sij, self.rate2.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)), sparse.COO([], [], shape=(n, n)), -sparse.tensordot(self.rate2.Rij + self.rate2.Sij, self.rate2.slice_l, axes=(0, 0)), sparse.tensordot(self.rate2.Dij, self.rate2.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)), ], axis=0).T # energy balance # atom1 A_engy1 = sparse.concatenate([ -1.5 * sparse.tensordot(self.rate1.Eij, self.rate1.slice_l, axes=(0, 0)), -1.5 * sparse.tensordot(self.rate1.Rij, self.rate1.slice_l, axes=(0, 0)), 2.5 * sparse.tensordot(self.rate1.Dij, self.rate1.slice_l, axes=(0, 0)), -1.5 * sparse.tensordot(self.rate1.Eij, self.rate2.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)), sparse.COO([], [], shape=(n, n)), ], axis=0).T # atom2 A_engy2 = sparse.concatenate([ -1.5 * sparse.tensordot(self.rate2.Eij, self.rate1.slice_l, axes=(0, 0)), sparse.COO([], [], shape=(n, n)), sparse.COO([], [], shape=(n, n)), -1.5 * sparse.tensordot(self.rate2.Eij, self.rate2.slice_l, axes=(0, 0)), -1.5 * sparse.tensordot(self.rate2.Rij, self.rate2.slice_l, axes=(0, 0)), 2.5 * sparse.tensordot(self.rate2.Dij, self.rate2.slice_l, axes=(0, 0)), ], axis=0).T # balance matrix. self.A = sparse.concatenate([A_part1, A_part2, A_engy1, A_engy2], axis=0) # boundary conditions b_part1 = ( -self.n1_edge * sparse.tensordot(self.rate1.Rij + self.rate1.Sij, self.rate1.slice_last, axes=(0, 0)) + self.nt1_edge * sparse.tensordot( self.rate1.Dij, self.rate1.slice_last, axes=(0, 0)) - self.n2_edge * sparse.tensordot( self.rate1.Sij, self.rate1.slice_last, axes=(0, 0))).todense() b_part2 = ( -self.n2_edge * sparse.tensordot(self.rate2.Rij + self.rate2.Sij, self.rate2.slice_last, axes=(0, 0)) + self.nt2_edge * sparse.tensordot( self.rate2.Dij, self.rate2.slice_last, axes=(0, 0)) - self.n1_edge * sparse.tensordot( self.rate2.Sij, self.rate2.slice_last, axes=(0, 0))).todense() b_engy1 = ( -1.5 * self.n1_edge * sparse.tensordot( self.rate1.Eij, self.rate1.slice_last, axes=(0, 0)) - 1.5 * self.nt1_edge * sparse.tensordot( self.rate1.Rij, self.rate1.slice_last, axes=(0, 0)) + 2.5 * self.ntt1_edge * sparse.tensordot( self.rate1.Dij, self.rate1.slice_last, axes=(0, 0)) - 1.5 * self.n2_edge * sparse.tensordot( self.rate1.Eij, self.rate1.slice_last, axes=(0, 0))).todense() b_engy2 = ( -1.5 * self.n2_edge * sparse.tensordot( self.rate2.Eij, self.rate2.slice_last, axes=(0, 0)) - 1.5 * self.nt2_edge * sparse.tensordot( self.rate2.Rij, self.rate2.slice_last, axes=(0, 0)) + 2.5 * self.ntt2_edge * sparse.tensordot( self.rate2.Dij, self.rate2.slice_last, axes=(0, 0)) - 1.5 * self.n1_edge * sparse.tensordot( self.rate2.Eij, self.rate2.slice_last, axes=(0, 0))).todense() self.b = -np.concatenate([b_part1, b_part2, b_engy1, b_engy2]) # matrix for the constraint L1 = scipy.sparse.hstack([ scipy.sparse.identity(n), scipy.sparse.identity(n) * (-2.0), scipy.sparse.identity(n), scipy.sparse.coo_matrix((n, n)), scipy.sparse.coo_matrix((n, n)), scipy.sparse.coo_matrix((n, n)) ]) L2 = scipy.sparse.hstack([ scipy.sparse.coo_matrix((n, n)), scipy.sparse.coo_matrix((n, n)), scipy.sparse.coo_matrix((n, n)), scipy.sparse.identity(n), scipy.sparse.identity(n) * (-2.0), scipy.sparse.identity(n) ]) self.L = scipy.sparse.vstack([L1, L2])
def derivatives_parallel( self, inp, func, n_jobs, derivatives_shape, descriptor_shape, return_descriptor, only_physical_cores=False, verbose=False, prefer="processes", ): """Used to parallelize the descriptor creation across multiple systems. Args: inp(list): Contains a tuple of input arguments for each processed system. These arguments are fed to the function specified by "func". func(function): Function that outputs the descriptor when given input arguments from "inp". n_jobs (int): Number of parallel jobs to instantiate. Parallellizes the calculation across samples. Defaults to serial calculation with n_jobs=1. If a negative number is given, the number of jobs will be calculated with, n_cpus + n_jobs, where n_cpus is the amount of CPUs as reported by the OS. With only_physical_cores you can control which types of CPUs are counted in n_cpus. derivatives_shape(list or None): If a fixed size output is produced from each job, this contains its shape. For variable size output this parameter is set to None derivatives_shape(list or None): If a fixed size output is produced from each job, this contains its shape. For variable size output this parameter is set to None only_physical_cores (bool): If a negative n_jobs is given, determines which types of CPUs are used in calculating the number of jobs. If set to False (default), also virtual CPUs are counted. If set to True, only physical CPUs are counted. verbose(bool): Controls whether to print the progress of each job into to the console. prefer(str): The parallelization method. Valid options are: - "processes": Parallelization based on processes. Uses the "loky" backend in joblib to serialize the jobs and run them in separate processes. Using separate processes has a bigger memory and initialization overhead than threads, but may provide better scalability if perfomance is limited by the Global Interpreter Lock (GIL). - "threads": Parallelization based on threads. Has bery low memory and initialization overhead. Performance is limited by the amount of pure python code that needs to run. Ideal when most of the calculation time is used by C/C++ extensions that release the GIL. Returns: np.ndarray | sparse.COO | list: The descriptor output for each given input. The return type depends on the desciptor setup. """ # Determine the number of jobs if n_jobs < 0: n_jobs = joblib.cpu_count(only_physical_cores) + n_jobs if n_jobs <= 0: raise ValueError("Invalid number of jobs specified.") # Split data into n_jobs (almost) equal jobs n_samples = len(inp) is_sparse = self._sparse k, m = divmod(n_samples, n_jobs) jobs = (inp[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n_jobs)) def create_multiple_with_descriptor(arguments, func, index, verbose): """This is the function that is called by each job but with different parts of the data. """ # Initialize output n_samples = len(arguments) if derivatives_shape: shape_der = [n_samples] shape_der.extend(derivatives_shape) if is_sparse: data_der = [] coords_der = [] else: derivatives = np.empty(shape_der, dtype=self.dtype) else: derivatives = [] if descriptor_shape: shape_des = [n_samples] shape_des.extend(descriptor_shape) if is_sparse: data_des = [] coords_des = [] else: descriptors = np.empty(shape_des, dtype=self.dtype) else: descriptors = [] old_percent = 0 # Loop through all samples assigned for this job for i_sample, i_arg in enumerate(arguments): i_der, i_des = func(*i_arg) if descriptor_shape: if is_sparse: sample_index = np.full((1, i_des.data.size), i_sample) data_des.append(i_des.data) coords_des.append( np.vstack((sample_index, i_des.coords))) else: descriptors[i_sample] = i_des else: descriptors.append(i_des) if derivatives_shape: if is_sparse: sample_index = np.full((1, i_der.data.size), i_sample) data_der.append(i_der.data) coords_der.append( np.vstack((sample_index, i_der.coords))) else: derivatives[i_sample] = i_der else: derivatives.append(i_der) if verbose: current_percent = (i_sample + 1) / n_samples * 100 if current_percent >= old_percent + 1: old_percent = current_percent print("Process {0}: {1:.1f} %".format( index, current_percent)) if is_sparse: if descriptor_shape is not None: data_des = np.concatenate(data_des) coords_des = np.concatenate(coords_des, axis=1) descriptors = sp.COO(coords_des, data_des, shape=shape_des) if derivatives_shape is not None: data_der = np.concatenate(data_der) coords_der = np.concatenate(coords_der, axis=1) derivatives = sp.COO(coords_der, data_der, shape=shape_der) return ((derivatives, descriptors), index) def create_multiple_without_descriptor(arguments, func, index, verbose): """This is the function that is called by each job but with different parts of the data. """ # Initialize output n_samples = len(arguments) if derivatives_shape: shape_der = [n_samples] shape_der.extend(derivatives_shape) if is_sparse: data_der = [] coords_der = [] else: derivatives = np.empty(shape_der, dtype=self.dtype) else: derivatives = [] old_percent = 0 # Loop through all samples assigned for this job for i_sample, i_arg in enumerate(arguments): i_der = func(*i_arg) if derivatives_shape: if is_sparse: sample_index = np.full((1, i_der.data.size), i_sample) data_der.append(i_der.data) coords_der.append( np.vstack((sample_index, i_der.coords))) else: derivatives[i_sample] = i_der else: derivatives.append(i_der) if verbose: current_percent = (i_sample + 1) / n_samples * 100 if current_percent >= old_percent + 1: old_percent = current_percent print("Process {0}: {1:.1f} %".format( index, current_percent)) if is_sparse and derivatives_shape is not None: data_der = np.concatenate(data_der) coords_der = np.concatenate(coords_der, axis=1) derivatives = sp.COO(coords_der, data_der, shape=shape_der) return ((derivatives, ), index) if return_descriptor: vec_lists = Parallel(n_jobs=n_jobs, prefer=prefer)( delayed(create_multiple_with_descriptor)(i_args, func, index, verbose) for index, i_args in enumerate(jobs)) else: vec_lists = Parallel(n_jobs=n_jobs, prefer=prefer)( delayed(create_multiple_without_descriptor)(i_args, func, index, verbose) for index, i_args in enumerate(jobs)) # Restore the calculation order. If using the threading backend, the # input order may have been lost. vec_lists.sort(key=lambda x: x[1]) # If the results are of the same length, we can simply concatenate them # into one numpy array. Otherwise we will return a regular python list. der_lists = [x[0][0] for x in vec_lists] if derivatives_shape: if is_sparse: derivatives = sp.concatenate(der_lists, axis=0) else: derivatives = np.concatenate(der_lists, axis=0) else: derivatives = [] for part in der_lists: derivatives.extend(part) if return_descriptor: des_lists = [x[0][1] for x in vec_lists] if descriptor_shape: if is_sparse: descriptors = sp.concatenate(des_lists, axis=0) else: descriptors = np.concatenate(des_lists, axis=0) else: descriptors = [] for part in des_lists: descriptors.extend(part) return (derivatives, descriptors) return derivatives
def main(param_file, runs): base_path = Path(__file__).parent # absolute working directory path # Read parameters from file params = open(base_path / param_file, 'r') parameters = {} # empty parameter dictionary for x in params: x = x.rstrip() x = x.split(': ') try: float(x[1]) parameters[x[0]] = float( x[1]) # creates dictionary with parameters except ValueError: parameters[x[0]] = x[1] # creates dictionary with parameters params.close() sparse_matrix = sparse.load_npz(base_path / parameters['Network']) #print(sparse_matrix) # prints network info (for testing) temp = parameters['Output Directory'] output = base_path / parameters['Output Directory'] # remove dirctory if empty try: os.rmdir(output) except OSError as ex: pass os.mkdir(output) # set parameters parameters['Output Directory'] = str(output) + '/' parameters['Students'] = sparse_matrix.shape[ 1] # only used with two networks parameters['Adjusted ICU'] = parameters['ICU Rate'] / parameters[ 'Breath Rate'] # adjusts ICU rate for simulation parameters['Adjusted ICU 2'] = parameters['ICU Rate 2'] / parameters[ 'Breath Rate 2'] # adjusts ICU rate for simulation # get course length and room sizes from csv if parameters['Class Mode'] == 1: class_file = str(base_path / parameters['Courses']) df = pd.read_csv(class_file, usecols=['start', 'end', 'capacity']) df['start'] = pd.to_datetime(df['start']) df['end'] = pd.to_datetime(df['end']) # calculate difference between start and end times in hours df['diff'] = df['end'] - df['start'] df['diff'] = df['diff'] / np.timedelta64(1, 'h') # get capacity and class start and end times from course_info csv class_length = df['diff'].tolist() room_size = df['capacity'].tolist() # adds the two networks together, results ONLY report for second network if parameters['Other Network'] != 'None': temp_matrix = sparse.load_npz(base_path / parameters['Other Network']) sparse_matrix = sparse.concatenate((sparse_matrix, temp_matrix), axis=1) # print(sparse_matrix) # prints network info (for testing) # In person and Online class distribution classes_type = [0] * (sparse_matrix.shape[2]) classes_size = [0] * (sparse_matrix.shape[2]) inPerson_classes = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: []} inPerson_people = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: []} # inPerson_infectability = {0:[], 1:[], 2:[], 3:[], 4:[], 5:[], 6:[]} for day in range(5): sizes = sparse_matrix[day].sum(axis=0) for class_id in range(sparse_matrix.shape[2]): classes_size[class_id] = max(classes_size[class_id], sizes[class_id]) for class_id in range(sparse_matrix.shape[2]): if classes_size[class_id] <= parameters[ 'Upper Limit Offline Class Size'] and classes_size[ class_id] > 1: classes_type[class_id] = 1 for day in range(5): sizes = sparse_matrix[day].sum(axis=0) for course in range(sparse_matrix.shape[2]): if (sizes[course] <= parameters['Upper Limit Offline Class Size'] and sizes[course] > 1): inPerson_classes[day].append(course) inPerson_people[day] = list( set(inPerson_people[day]) | set(sparse_matrix[day, :, course].nonzero()[0])) # calculate infectability infectability = [0] * (sparse_matrix.shape[1]) for person in range(sparse_matrix.shape[1]): enrolled_courses = [ course_id for day in range(5) for course_id in sparse_matrix[day, person].nonzero()[0] ] infection = sum([ classes_size[course_id] for course_id in enrolled_courses if classes_type[course_id] == 1 ]) if infection == 0: infectability[person] = 0 else: infectability[person] = math.log( infection, 2) # infectability = log(weekly interactions, 2) # print("person:", person) # print("courses:", enrolled_courses) # print ("Infectability:", infectability[person]) # ds = [infection for infection in infectability if infection > 0] # num_bins = 100 # a = plt.hist(ds, num_bins, facecolor='blue', alpha=0.5) # with open('person_infectability.csv', 'w+', newline = '') as f: # writer = csv.writer(f) # l = [] # writer.writerow(l) # l.append('NumPerson') # l.append('Infectability') # writer.writerow(l) # for i in range(len(a[0])): # l = [] # l.append(a[0][i]) # l.append(a[1][i]) # writer.writerow(l) # f.close() #n, bins, patches = plt.hist(ds, num_bins, facecolor='blue', alpha=0.5) #plt.show() for day in range(5): print("Day:", day, "In person Classes", len(inPerson_classes[day]), "In person People", len(inPerson_people[day])) # for day in range(5): # course_people_infectability = {person_:infectability[person_] for person_ in inPerson_people[day]} # inPerson_infectability[day] = [(k,v) for k, v in sorted(course_people_infectability.items(), key=lambda item: item[1])][::-1] # set more parameters parameters['Threshold'] = 0.0034 * sparse_matrix.shape[ 1] # used for calculating likelihood of catching disease, scaled with population size parameters['Patient Zero'] = sparse_matrix.shape[1] * parameters[ 'Initial Infected Population'] # if parameters['Test Upon Entry']: # Initially_infected = sparse_matrix.shape[1] * parameters['Initial Infected Population'] # False_positve_cases = (sparse_matrix.shape[1] - Initially_infected) * parameters['Asymptomatic False Positive Rate'] # False_negative_cases = Initially_infected * parameters['Asymptomatic False Negative Rate'] # parameters['Patient Zero'] = Initially_infected - False_negative_cases + False_positve_cases # number of seeded infected, scaled with population size # else: # parameters['Patient Zero'] = sparse_matrix.shape[1] * parameters['Initial Infected Population'] print("Seeds:", parameters['Patient Zero']) p = Pool(processes=1) # max 10 processes for i in range(runs): # iteration seeding start seed = scipy.sparse.lil_matrix( (sparse_matrix.shape[0], sparse_matrix.shape[1])) # modifiable time person matrix test_state = scipy.sparse.lil_matrix( (sparse_matrix.shape[0], sparse_matrix.shape[1])) #test_state[0,:] = 0 days_infected = [ 0 ] * sparse_matrix.shape[1] # days infected for each person uniqueDay0 = np.unique(sparse_matrix[ 0, :, :].nonzero()) # gets unique users from first day z = 0 # tracks number seeded (to prevent seeding same person twice) while z < parameters[ 'Patient Zero']: # sets n number of check-ins on day 0 to infection pat_zero = random.choice(uniqueDay0) # get random person if (z == 0): # guaranatees a spreader seed[0, pat_zero] = 2 # asymptomatic spreader days_infected[pat_zero] = int(parameters['Incubation Mean']) #days_infected[pat_zero] = int(parameters['Incubation Mean']) - 2 z += 1 elif (seed[0, pat_zero] == 0): seed[0, pat_zero] = 1 # start as incubating days_infected[pat_zero] = int( (parameters['Incubation Mean'] - 1) - (z % (parameters['Incubation Mean'] - 1))) #days_infected[pat_zero] = int((parameters['Incubation Mean'] - 3) - (z % (parameters['Incubation Mean'] - 3))) z += 1 #extra_test_capacity = parameters['Test Capacity'] #run_test(sparse_matrix, 0, seed, test_state, parameters, extra_test_capacity, infectability, inPerson_people, inPerson_classes, test_classes) #run_sim(seed, test_state, days_infected, sparse_matrix, classes_type, classes_size, parameters, i, class_length, room_size, infectability, inPerson_people, inPerson_classes) p.apply_async(run_sim, args=( seed, test_state, days_infected, sparse_matrix, classes_type, classes_size, parameters, i, class_length, room_size, infectability, inPerson_people, inPerson_classes, )) # scipy.sparse.save_npz(first_save_loc + '/state_matrix.npz', seed.tocoo()) p.close() p.join() # find mean, standard error and auto generates an SEIR graph getMean(sparse_matrix.shape[0], parameters['Output Directory'], temp)
def to_sparse_array(self, min_days=90, max_days=None, rebase=True, **kwargs): if self.aggregated_data is not None: clients = [] if max_days is None: max_days = int(self.aggregated_data['rel_day'].max() ) + 1 # infer from data if max_days < 91: max_days = 91 self.log.info( 'max_days inferred from data: {}'.format(max_days)) if rebase: assert min_days < max_days, 'It is not possible to rebase with min_days={}, max_days={}'.format( min_days, max_days) self.log.info( 'Rebasing observations with min_days={}'.format(min_days)) num_col = int(self.aggregated_data.shape[-1]) - 1 client_shape = [1, max_days, num_col] agg_by_id = self.aggregated_data.groupby('cl_id') for cl_id, cl_group in agg_by_id: client_array = np.zeros(client_shape) client_values = cl_group.values client_index = client_values[:, 1].astype( int) # second column - rel_day values --> to 0_dim values if rebase: # Rebase but allow no less than 90 days observation period: client_max_day = client_index.max() if client_max_day < min_days: client_max_day = min_days rebase_index = max_days - client_max_day - 1 client_index += rebase_index client_array[:, client_index, :] = client_values[:, 1:] # remove cl_id (gone to new dim) and rel_day # Fill all records for single client: client_array[..., 0] = int(cl_id) # id if np.isnan(cl_group.target_sum).any(): client_array[..., -1] = float('NaN') client_array[..., -2] = float('NaN') else: client_array[..., -1] = cl_group.target_sum.mean() client_array[..., -2] = cl_group.target_flag.all() # Save as sparse 3d array: clients.append(sparse.COO(client_array.astype('float32'))) full_array_sparse = sparse.concatenate(clients, axis=0) else: self.log.warning( 'No aggregated data found, call .aggregate_by_daily_sums() method first.' ) full_array_sparse = None return full_array_sparse
def test_upcast_2(a, b): assert sparse.concatenate((a, a)).indptr.dtype == np.uint16 assert sparse.stack((b, b)).indptr.dtype == np.uint16
def create_single( self, system, positions=None, ): """Return the local many-body tensor representation for the given system and positions. Args: system (:class:`ase.Atoms` | :class:`.System`): Input system. positions (iterable): Positions or atom index of points, from which local_mbtr is created. Can be a list of integer numbers or a list of xyz-coordinates. If integers provided, the atoms at that index are used as centers. If positions provided, new atoms are added at that position. If no positions are provided, all atoms in the system will be used as centers. Returns: 1D ndarray: The local many-body tensor representations of given positions, for k terms, as an array. These are ordered as given in positions. """ # Transform the input system into the internal System-object system = self.get_system(system) # Check that the system does not have elements that are not in the list # of atomic numbers atomic_number_set = set(system.get_atomic_numbers()) self.check_atomic_numbers(atomic_number_set) self._interaction_limit = len(system) system_positions = system.get_positions() system_atomic_numbers = system.get_atomic_numbers() # Ensure that the atomic number 0 is not present in the system if 0 in atomic_number_set: raise ValueError( "Please do not use the atomic number 0 in local MBTR as it " "is reserved to mark the atoms use as analysis centers.") # Form a list of indices, positions and atomic numbers for the local # centers. k=3 and k=2 use a slightly different approach, so two # versions are built i_new = len(system) indices_k2 = [] new_pos_k2 = [] new_atomic_numbers_k2 = [] indices_k3 = [] new_pos_k3 = [] new_atomic_numbers_k3 = [] n_atoms = len(system) if positions is not None: n_loc = len(positions) # Check validity of position definitions and create final cartesian # position list if len(positions) == 0: raise ValueError( "The argument 'positions' should contain a non-empty set of" " atomic indices or cartesian coordinates with x, y and z " "components.") for i in positions: if np.issubdtype(type(i), np.integer): i_len = len(system) if i >= i_len or i < 0: raise ValueError( "The provided index {} is not valid for the system " "with {} atoms.".format(i, i_len)) indices_k2.append(i) indices_k3.append(i) new_pos_k2.append(system_positions[i]) new_atomic_numbers_k2.append(system_atomic_numbers[i]) elif isinstance(i, (list, tuple, np.ndarray)): if len(i) != 3: raise ValueError( "The argument 'positions' should contain a " "non-empty set of atomic indices or cartesian " "coordinates with x, y and z components.") new_pos_k2.append(np.array(i)) new_pos_k3.append(np.array(i)) new_atomic_numbers_k2.append(0) new_atomic_numbers_k3.append(0) i_new += 1 else: raise ValueError( "Create method requires the argument 'positions', a " "list of atom indices and/or positions.") # If positions are not supplied, it is assumed that each atom is used # as a center else: n_loc = n_atoms indices_k2 = np.arange(n_atoms) indices_k3 = np.arange(n_atoms) new_pos_k2 = system.get_positions() new_atomic_numbers_k2 = system.get_atomic_numbers() # Calculate the "raw" outputs for each term. mbtr = {} if self.k2 is not None: new_system_k2 = System( symbols=new_atomic_numbers_k2, positions=new_pos_k2, ) mbtr["k2"] = self._get_k2(system, new_system_k2, indices_k2) if self.k3 is not None: new_system_k3 = System( symbols=new_atomic_numbers_k3, positions=new_pos_k3, ) mbtr["k3"] = self._get_k3(system, new_system_k3, indices_k3) # Handle normalization if self.normalization == "l2_each": if self.flatten is True: for key, value in mbtr.items(): norm = np.linalg.norm(value.data) value /= norm else: for key, value in mbtr.items(): for array in value: i_data = array.ravel() i_norm = np.linalg.norm(i_data) array /= i_norm # Flatten output if requested if self.flatten: keys = sorted(mbtr.keys()) if len(keys) > 1: result = sparse.concatenate([mbtr[key] for key in keys], axis=1) else: result = mbtr[keys[0]] # Make into a dense array if requested if not self.sparse: result = result.todense() # Otherwise return a list of dictionaries, each dictionary containing # the requested unflattened tensors else: result = np.empty((n_loc), dtype="object") for i_loc in range(n_loc): i_dict = {} for key in mbtr.keys(): tensor = mbtr[key] i_dict[key] = tensor[i_loc] result[i_loc] = i_dict return result