def add_contacts(self, contacts, lkey=None, beta=None):
        '''
        Add new contacts to the array. See also contacts.add_layer().
        '''

        # If no layer key is supplied and it can't be worked out from defaults, use the first layer
        if lkey is None:
            lkey = self.layer_keys()[0]

        # Validate the supplied contacts
        if isinstance(contacts, Contacts):
            new_contacts = contacts
        elif isinstance(contacts, Layer):
            new_contacts = {}
            new_contacts[lkey] = contacts
        elif sc.checktype(contacts, 'array'):
            new_contacts = {}
            new_contacts[lkey] = pd.DataFrame(data=contacts)
        elif isinstance(contacts, dict):
            new_contacts = {}
            new_contacts[lkey] = pd.DataFrame.from_dict(contacts)
        elif isinstance(
                contacts, list
        ):  # Assume it's a list of contacts by person, not an edgelist
            new_contacts = self.make_edgelist(
                contacts)  # Assume contains key info
        else:  # pragma: no cover
            errormsg = f'Cannot understand contacts of type {type(contacts)}; expecting dataframe, array, or dict'
            raise TypeError(errormsg)

        # Ensure the columns are right and add values if supplied
        for lkey, new_layer in new_contacts.items():
            n = len(new_layer['p1'])
            if 'beta' not in new_layer.keys() or len(new_layer['beta']) != n:
                if beta is None:
                    beta = 1.0
                beta = spu.default_float(beta)
                new_layer['beta'] = np.ones(n, dtype=spu.default_float) * beta

            # Create the layer if it doesn't yet exist
            if lkey not in self.contacts:
                self.contacts[lkey] = Layer(label=lkey)

            # Actually include them, and update properties if supplied
            for col in self.contacts[lkey].keys(
            ):  # Loop over the supplied columns
                self.contacts[lkey][col] = np.concatenate(
                    [self.contacts[lkey][col], new_layer[col]])
            self.contacts[lkey].validate()

        return
Esempio n. 2
0
def scaled_norm(test, train, quantiles='IQR'):
    '''
    Calculation of distances between a test set of points and a training set of
    points -- was going to use Numba but plenty fast without.
    
    Before calculating distances, normalize each dimension to have the same "scale"
    (default: interquartile range).
    
    "test" can be a single point or an array of points.
    '''

    # Handle inputs
    if quantiles in [None, 'iqr', 'IQR']:
        quantiles = [0.25, 0.75]  # Default quantiles to compute scale from
    elif not sc.checktype(quantiles, 'arraylike'):
        raise TypeError(
            f'Cound not understand quantiles {type(quantiles)}: should be "IQR" or array-like'
        )

    # Copy; otherwise, these get modified in place
    test = sc.dcp(test)
    train = sc.dcp(train)

    # Dimension checking
    if test.ndim == 1:
        test = np.array([test])  # Ensure it's 2-dimensional

    ntest, npars = test.shape
    ntrain, npars2 = train.shape
    if npars != npars2:
        raise ValueError(
            f'Array shape appears to be incorrect: {npars2} should be {npars}')

    # Normalize
    for p in range(npars):
        scale = np.diff(np.quantile(train[:, p], quantiles))
        train[:, p] /= scale  # Transform to be of comparable scale
        test[:, p] /= scale  # For test points too

    # The actual calculation
    distances = np.zeros((ntest, ntrain))
    for i in range(ntest):
        distances[i, :] = np.linalg.norm(train - test[i, :], axis=1)

    if len(distances) == 1:
        distances = distances.flatten(
        )  # If we have only a single point, return a vector of distances

    return distances
Esempio n. 3
0
    def add_contacts(self, contacts, lkey=None, beta=None):
        ''' Add new contacts to the array '''

        if lkey is None:
            lkey = self.layer_keys()[0]
        if lkey not in self.contacts:
            self.contacts[lkey] = Layer()

        # Validate the supplied contacts
        if isinstance(contacts, Contacts):
            new_contacts = contacts
        if isinstance(contacts, Layer):
            new_contacts = {}
            new_contacts[lkey] = contacts
        elif sc.checktype(contacts, 'array'):
            new_contacts = {}
            new_contacts[lkey] = pd.DataFrame(data=contacts)
        elif isinstance(contacts, dict):
            new_contacts = {}
            new_contacts[lkey] = pd.DataFrame.from_dict(contacts)
        elif isinstance(
                contacts, list
        ):  # Assume it's a list of contacts by person, not an edgelist
            new_contacts = self.make_edgelist(
                contacts)  # Assume contains key info
        else:
            errormsg = f'Cannot understand contacts of type {type(contacts)}; expecting dataframe, array, or dict'
            raise TypeError(errormsg)

        # Ensure the columns are right and add values if supplied
        for lkey, new_layer in new_contacts.items():
            n = len(new_layer['p1'])
            if 'layer' not in new_layer:
                new_layer['layer'] = np.array([lkey] * n)
            if 'beta' not in new_layer or len(new_layer['beta']) != n:
                if beta is None:
                    beta = self.pars['beta_layer'][lkey]
                beta = np.float32(beta)
                new_layer['beta'] = np.ones(n, dtype=np.float32) * beta

            # Actually include them, and update properties if supplied
            for col in self.contacts[lkey].keys():
                self.contacts[lkey][col] = np.concatenate(
                    [self.contacts[lkey][col], new_layer[col]])
            self.contacts[lkey].validate()

        return
Esempio n. 4
0
    def parse_data(self, datakey=None, datatype=None):
        '''
        Ensure the data object has the right structure, and store the keys in the object.
        '''

        # Choose the data type -- by default, cumulative exposures
        if datakey is None:
            datakey = 'cum_exposed'  # TODO: make this less hard-coded?
        if datatype is None:
            datatype = 'cumulative'
        self.datakey = datakey
        self.datatype = datatype

        # Check that the data is a dict of results types
        D = self.data  # Shortcut
        if not isinstance(D, dict):
            raise TypeError(
                f'Data must be dict with keys for different results, but you supplied {type(D)}'
            )

        # ...and then a dict of scenarios
        self.datakeys = list(D.keys())
        if self.datakey not in self.datakeys:
            raise KeyError(
                f'Could not find supplied datakey {self.datakey} in supplied datakeys {self.datakeys}'
            )
        dk0 = self.datakeys[0]  # For "data key 0"
        if not isinstance(D[dk0], dict):
            raise TypeError(
                f'The second level in the data must also be a dict, but you supplied {type(D[dk0])}'
            )

        # ...and then a dict of best, high, low
        self.scenkeys = list(D[dk0].keys())
        sk0 = self.scenkeys[0]
        if not isinstance(D[dk0][sk0], dict):
            raise TypeError(
                f'The third level in the data must also be a dict, but you supplied {type(D[dk0][sk0])}'
            )

        # ...and a numeric array
        self.blh = ['best', 'low', 'high']
        if not all([(key in D[dk0][sk0]) for key in self.blh]):
            raise ValueError(
                f'The required keys {self.blh} could not be found in {D[dk0][sk0].keys()}'
            )
        if not sc.checktype(D[dk0][sk0].best, 'arraylike'):
            raise TypeError(
                f'Was expecting a numeric array, but got {type(D[dk0][sk0].best)}'
            )

        # Figure out how many points are in this thing
        self.npts = len(D[dk0][sk0].best)

        # Store labels
        self.scenlabels = {
            scenkey: D[dk0][scenkey].name
            for scenkey in self.scenkeys
        }

        return