def add_contacts(self, contacts, lkey=None, beta=None): ''' Add new contacts to the array. See also contacts.add_layer(). ''' # If no layer key is supplied and it can't be worked out from defaults, use the first layer if lkey is None: lkey = self.layer_keys()[0] # Validate the supplied contacts if isinstance(contacts, Contacts): new_contacts = contacts elif isinstance(contacts, Layer): new_contacts = {} new_contacts[lkey] = contacts elif sc.checktype(contacts, 'array'): new_contacts = {} new_contacts[lkey] = pd.DataFrame(data=contacts) elif isinstance(contacts, dict): new_contacts = {} new_contacts[lkey] = pd.DataFrame.from_dict(contacts) elif isinstance( contacts, list ): # Assume it's a list of contacts by person, not an edgelist new_contacts = self.make_edgelist( contacts) # Assume contains key info else: # pragma: no cover errormsg = f'Cannot understand contacts of type {type(contacts)}; expecting dataframe, array, or dict' raise TypeError(errormsg) # Ensure the columns are right and add values if supplied for lkey, new_layer in new_contacts.items(): n = len(new_layer['p1']) if 'beta' not in new_layer.keys() or len(new_layer['beta']) != n: if beta is None: beta = 1.0 beta = spu.default_float(beta) new_layer['beta'] = np.ones(n, dtype=spu.default_float) * beta # Create the layer if it doesn't yet exist if lkey not in self.contacts: self.contacts[lkey] = Layer(label=lkey) # Actually include them, and update properties if supplied for col in self.contacts[lkey].keys( ): # Loop over the supplied columns self.contacts[lkey][col] = np.concatenate( [self.contacts[lkey][col], new_layer[col]]) self.contacts[lkey].validate() return
def scaled_norm(test, train, quantiles='IQR'): ''' Calculation of distances between a test set of points and a training set of points -- was going to use Numba but plenty fast without. Before calculating distances, normalize each dimension to have the same "scale" (default: interquartile range). "test" can be a single point or an array of points. ''' # Handle inputs if quantiles in [None, 'iqr', 'IQR']: quantiles = [0.25, 0.75] # Default quantiles to compute scale from elif not sc.checktype(quantiles, 'arraylike'): raise TypeError( f'Cound not understand quantiles {type(quantiles)}: should be "IQR" or array-like' ) # Copy; otherwise, these get modified in place test = sc.dcp(test) train = sc.dcp(train) # Dimension checking if test.ndim == 1: test = np.array([test]) # Ensure it's 2-dimensional ntest, npars = test.shape ntrain, npars2 = train.shape if npars != npars2: raise ValueError( f'Array shape appears to be incorrect: {npars2} should be {npars}') # Normalize for p in range(npars): scale = np.diff(np.quantile(train[:, p], quantiles)) train[:, p] /= scale # Transform to be of comparable scale test[:, p] /= scale # For test points too # The actual calculation distances = np.zeros((ntest, ntrain)) for i in range(ntest): distances[i, :] = np.linalg.norm(train - test[i, :], axis=1) if len(distances) == 1: distances = distances.flatten( ) # If we have only a single point, return a vector of distances return distances
def add_contacts(self, contacts, lkey=None, beta=None): ''' Add new contacts to the array ''' if lkey is None: lkey = self.layer_keys()[0] if lkey not in self.contacts: self.contacts[lkey] = Layer() # Validate the supplied contacts if isinstance(contacts, Contacts): new_contacts = contacts if isinstance(contacts, Layer): new_contacts = {} new_contacts[lkey] = contacts elif sc.checktype(contacts, 'array'): new_contacts = {} new_contacts[lkey] = pd.DataFrame(data=contacts) elif isinstance(contacts, dict): new_contacts = {} new_contacts[lkey] = pd.DataFrame.from_dict(contacts) elif isinstance( contacts, list ): # Assume it's a list of contacts by person, not an edgelist new_contacts = self.make_edgelist( contacts) # Assume contains key info else: errormsg = f'Cannot understand contacts of type {type(contacts)}; expecting dataframe, array, or dict' raise TypeError(errormsg) # Ensure the columns are right and add values if supplied for lkey, new_layer in new_contacts.items(): n = len(new_layer['p1']) if 'layer' not in new_layer: new_layer['layer'] = np.array([lkey] * n) if 'beta' not in new_layer or len(new_layer['beta']) != n: if beta is None: beta = self.pars['beta_layer'][lkey] beta = np.float32(beta) new_layer['beta'] = np.ones(n, dtype=np.float32) * beta # Actually include them, and update properties if supplied for col in self.contacts[lkey].keys(): self.contacts[lkey][col] = np.concatenate( [self.contacts[lkey][col], new_layer[col]]) self.contacts[lkey].validate() return
def parse_data(self, datakey=None, datatype=None): ''' Ensure the data object has the right structure, and store the keys in the object. ''' # Choose the data type -- by default, cumulative exposures if datakey is None: datakey = 'cum_exposed' # TODO: make this less hard-coded? if datatype is None: datatype = 'cumulative' self.datakey = datakey self.datatype = datatype # Check that the data is a dict of results types D = self.data # Shortcut if not isinstance(D, dict): raise TypeError( f'Data must be dict with keys for different results, but you supplied {type(D)}' ) # ...and then a dict of scenarios self.datakeys = list(D.keys()) if self.datakey not in self.datakeys: raise KeyError( f'Could not find supplied datakey {self.datakey} in supplied datakeys {self.datakeys}' ) dk0 = self.datakeys[0] # For "data key 0" if not isinstance(D[dk0], dict): raise TypeError( f'The second level in the data must also be a dict, but you supplied {type(D[dk0])}' ) # ...and then a dict of best, high, low self.scenkeys = list(D[dk0].keys()) sk0 = self.scenkeys[0] if not isinstance(D[dk0][sk0], dict): raise TypeError( f'The third level in the data must also be a dict, but you supplied {type(D[dk0][sk0])}' ) # ...and a numeric array self.blh = ['best', 'low', 'high'] if not all([(key in D[dk0][sk0]) for key in self.blh]): raise ValueError( f'The required keys {self.blh} could not be found in {D[dk0][sk0].keys()}' ) if not sc.checktype(D[dk0][sk0].best, 'arraylike'): raise TypeError( f'Was expecting a numeric array, but got {type(D[dk0][sk0].best)}' ) # Figure out how many points are in this thing self.npts = len(D[dk0][sk0].best) # Store labels self.scenlabels = { scenkey: D[dk0][scenkey].name for scenkey in self.scenkeys } return