Esempio n. 1
0
def compute_overlap(mat1, mat2):
    s1 = mat1.shape[0]
    s2 = mat2.shape[0]
    area1 = (mat1[:, 2] - mat1[:, 0]) * (mat1[:, 3] - mat1[:, 1])
    if mat2.shape[1] == 5:
        area2 = mat2[:, 4]
    else:
        area2 = (mat2[:, 2] - mat2[:, 0]) * (mat2[:, 3] - mat2[:, 1])
    x1 = cartesian([mat1[:, 0], mat2[:, 0]])
    x1 = np.amax(x1, axis=1)
    x2 = cartesian([mat1[:, 2], mat2[:, 2]])
    x2 = np.amin(x2, axis=1)
    com_zero = np.zeros(x2.shape[0])
    w = x2 - x1
    w = w - 1
    w = np.maximum(com_zero, w)
    y1 = cartesian([mat1[:, 1], mat2[:, 1]])
    y1 = np.amax(y1, axis=1)
    y2 = cartesian([mat1[:, 3], mat2[:, 3]])
    y2 = np.amin(y2, axis=1)
    h = y2 - y1
    h = h - 1
    h = np.maximum(com_zero, h)
    oo = w * h
    aa = cartesian([area1[:], area2[:]])
    aa = np.sum(aa, axis=1)
    ooo = oo / (aa - oo)
    overlap = np.transpose(ooo.reshape(s1, s2), (1, 0))
    return overlap
Esempio n. 2
0
def test_cartesian():
    # Check if cartesian product delivers the right results

    axes = (np.array([1, 2, 3]), np.array([4, 5]), np.array([6, 7]))

    true_out = np.array(
        [
            [1, 4, 6],
            [1, 4, 7],
            [1, 5, 6],
            [1, 5, 7],
            [2, 4, 6],
            [2, 4, 7],
            [2, 5, 6],
            [2, 5, 7],
            [3, 4, 6],
            [3, 4, 7],
            [3, 5, 6],
            [3, 5, 7],
        ]
    )

    out = cartesian(axes)
    assert_array_equal(true_out, out)

    # check single axis
    x = np.arange(3)
    assert_array_equal(x[:, np.newaxis], cartesian((x,)))
Esempio n. 3
0
    def _grid_find_neighbors(self, X, k=3):
        n_x = np.shape(X)[0]

        n_dims = len(self.grid_dims)

        n_neighbors = k**n_dims

        neighbors_per_dim_X, distances_per_dim = self._find_k_nearest_per_dim(
            X, k=k)

        #for ii in range(n_x):
        #with Timer() as t:
        #distances_per_grid_dim, neighbor_coordinates = np.reshape(neighbors_per_dim_X[ii,0,:],(-1,1))
        #indices_all = np.zeros((n_x*n_neighbors,))
        distances_all = np.zeros((n_x, n_neighbors, n_dims))
        neighbor_coordinates_all = np.zeros((n_x * n_neighbors, n_dims))

        for ii in range(n_x):

            neighbor_coordinates_all[ii * n_neighbors:(ii + 1) *
                                     n_neighbors, :] = cartesian(
                                         neighbors_per_dim_X[ii, :, :])
            distances_all[ii, :, :] = cartesian(distances_per_dim[ii, :, :])

        indices_all = self._kron_grid_indices_to_matrix_indices(
            neighbor_coordinates_all.astype(int))

        distances_all = np.mean(np.square(distances_all), axis=2)

        return distances_all, indices_all
Esempio n. 4
0
def test_cartesian():
    # Check if cartesian product delivers the right results

    axes = (np.array([1, 2, 3]), np.array([4, 5]), np.array([6, 7]))

    true_out = np.array(
        [
            [1, 4, 6],
            [1, 4, 7],
            [1, 5, 6],
            [1, 5, 7],
            [2, 4, 6],
            [2, 4, 7],
            [2, 5, 6],
            [2, 5, 7],
            [3, 4, 6],
            [3, 4, 7],
            [3, 5, 6],
            [3, 5, 7],
        ]
    )

    out = cartesian(axes)
    assert_array_equal(true_out, out)

    # check single axis
    x = np.arange(3)
    assert_array_equal(x[:, np.newaxis], cartesian((x,)))
Esempio n. 5
0
    def predict(self, X, alpha=.05):
        n_sample = self.X_train.shape[0]
        self.n_iter = max(self.n_iter, int(np.sqrt(n_sample)))

        y_hat_b = np.zeros((self.n_iter, X.shape[0]))
        residuals_val = []

        # bootstrap
        for b in range(self.n_iter):
            idx_train = np.random.choice(np.arange(n_sample),
                                         n_sample,
                                         replace=True)
            idx_val = np.setdiff1d(np.arange(n_sample), idx_train)

            self.model.fit(self.X_train[idx_train], self.y_train[idx_train])
            y_hat_train_b = self.model.predict(self.X_train[idx_val])
            residuals_val.append(self.y_train[idx_val] - y_hat_train_b)
            y_hat_b[b] = self.model.predict(X)

        residuals_val = np.concatenate(residuals_val)

        # training residuals
        self.model.fit(self.X_train, self.y_train)
        y_hat_train = self.model.predict(self.X_train)
        residuals_train = self.y_train - y_hat_train

        # take percentiles to allow comparison between train and validation
        # residuals
        residuals_val = np.percentile(residuals_val, q=np.arange(100))
        residuals_train = np.percentile(residuals_train, q=np.arange(100))

        # compute weighted residuals to account for overfitting as we use
        # training residuals set to estimate predictions intervals
        if n_sample > self.max_samples:
            combs_idx = np.random.choice(np.arange(n_sample), self.max_samples)
            combs = cartesian(
                (self.y_train[combs_idx], y_hat_train[combs_idx]))
        else:
            combs = cartesian((self.y_train, y_hat_train))

        no_info_err_rate = ((combs[:, 0] - combs[:, 1])**2).mean()
        relative_overfit_rate = (residuals_val.mean() -
                                 residuals_train.mean()) / (
                                     no_info_err_rate - residuals_train.mean())
        weight = .632 / (1 - .368 * relative_overfit_rate)
        residuals = (1 - weight) * residuals_train + weight * residuals_val

        # compute the estimate of the noise around the bootstrapped predictions
        # and take percentiles as prediction intervals
        C = np.array([[m + o for m in y_hat_b[:, i] for o in residuals]
                      for i in range(X.shape[0])])
        q = [100 * alpha / 2, 100 * (1 - alpha / 2)]
        percentiles = np.percentile(C, q, axis=1)
        y_hat = self.model.predict(X)

        return y_hat, percentiles
def worker(args):
    p, processors, partitions, dataFrame, H0, covariance_class, n, d = args
    list_bins = []
    list_means = []
    list_digitized = []
    for k in range(d):
        partition = partitions[k]
        data = dataFrame[k]
        #min, max, parts count
        dim_bins = linspace(partition[0], partition[1], partition[2] + 1)
        list_means.append((dim_bins[:-1] + dim_bins[1:]) / 2.)
        bin_dig = digitize(data, dim_bins)
        bin_dig[bin_dig == partition[2] + 1] = partition[2]
        list_digitized.append(bin_dig)
        list_bins.append(linspace(1, partition[2], partition[2]))
    digitized = vstack(list_digitized).T
    bins = cartesian(list_bins)
    bin_means = cartesian(list_means)
    selections = []
    H_s = []
    #calculate this processors chunk of bins
    chunks = arange(len(bins)) % processors == p
    iu = triu_indices(d, 1)
    if covariance_class == 'H3':
        #Square covariance matrix
        h0 = mvn.unrollSigma(H0, iu)
    else:
        h0 = H0**0.5
    for bin, amean in zip(bins[chunks], bin_means[chunks]):
        selection = (digitized == bin).all(axis=1)
        if selection.any():
            res = optimize.minimize(getBinnedUnbiasedIMSE,
                                    x0=h0,
                                    args=(dataFrame, bin_means,
                                          pd.DataFrame(atleast_2d(amean)),
                                          covariance_class, d, iu, selection),
                                    method='BFGS',
                                    options={
                                        'gtol': 1e-4,
                                        'eps': 1e-5
                                    })
            res = res.x
            if covariance_class == 'H3':
                H_s.append(mvn.rollSigma(res, d, iu))
            elif covariance_class == 'H2':
                H_s.append(res**2.)
            else:
                H_s.append(ones(d) * res**2.)
            selections.append(selection)
    return selections, H_s
Esempio n. 7
0
 def __init__(self, appliance_power_dict={}):
     self.power_list = appliance_power_dict
     self.index_to_status = cartesian(
         [i for i in range(len(self.power_list[app]))]
         for app in self.power_list)
     self.MODEL_NAME = "BNILM"
     self.compute_all_state()
Esempio n. 8
0
def weighted_hausdorff_distance(w, h, alpha):
    all_img_locations = tf.convert_to_tensor(cartesian([np.arange(w), np.arange(h)]), dtype=tf.float32)
    max_dist = math.sqrt(w ** 2 + h ** 2)

    def hausdorff_loss(y_true, y_pred):
        def loss(y_true, y_pred):
            eps = 1e-6
            y_true = K.reshape(y_true, [w, h])
            gt_points = K.cast(tf.where(y_true > 0.5), dtype=tf.float32)
            num_gt_points = tf.shape(gt_points)[0]
            y_pred = K.flatten(y_pred)
            p = y_pred
            p_replicated = tf.squeeze(K.repeat(tf.expand_dims(p, axis=-1), num_gt_points))
            d_matrix = cdist(all_img_locations, gt_points)
            num_est_pts = tf.reduce_sum(p)
            term_1 = (1 / (num_est_pts + eps)) * K.sum(p * K.min(d_matrix, 1))

            d_div_p = K.min((d_matrix + eps) / (p_replicated ** alpha + (eps / max_dist)), 0)
            d_div_p = K.clip(d_div_p, 0, max_dist)
            term_2 = K.mean(d_div_p, axis=0)

            return term_1 + term_2

        batched_losses = tf.map_fn(lambda x:
                                   loss(x[0], x[1]),
                                   (y_true, y_pred),
                                   dtype=tf.float32)
        return K.mean(tf.stack(batched_losses))

    return hausdorff_loss
Esempio n. 9
0
 def createStatePoints(self):
     stateDimArrays = []
     for d in range(len(self.minList)):
         dimArray = np.linspace(self.minList[d], self.maxList[d],
                                NUM_POINTS_PER_DIM)
         stateDimArrays.append(dimArray)
     return cartesian(stateDimArrays)
def combine_args(**argarrs):#argarrs are [arg name]=[list of values]
    #Get all permutations of the arguments. Returns a pandas data frame with the argument names as the columns and the cartesian product of all their possible values.
    #Note that this can't handle None values (at least not yet)
    arg_keys = argarrs.keys()
    if len(arg_keys) == 0:
        raise ValueError("Must be at least one keyword argument (if you don't want to train multiple models just use lists with single entries")
    arg_tup = ()
    str_lens = []
    type_list = []
    M = 1
    for key in arg_keys:
        str_vals = [str(entry) for entry in argarrs[key]]
        str_lens.extend([len(entry) for entry in str_vals])
        type_list.append(argarrs[key].dtype)
        #print key,str_vals,str_lens
        M *= len(argarrs[key])
        #print str_vals,str_lens
        arg_tup += (str_vals,)
    #print 'debug',type_list
    max_str_lens = max(str_lens)
    all_arg_combos = np.zeros((M,len(arg_keys)),dtype='S{0:d}'.format(max_str_lens))
    all_arg_combos = pd.DataFrame(cartesian(arg_tup,all_arg_combos),columns=arg_keys)
    for i,currtype in enumerate(type_list):
        if currtype == np.bool:
            all_arg_combos[arg_keys[i]] = (all_arg_combos[arg_keys[i]] == 'True')
        else:
            all_arg_combos[arg_keys[i]] = all_arg_combos[arg_keys[i]].astype(currtype)
    return all_arg_combos
Esempio n. 11
0
def combine_args(**argarrs):  #argarrs are [arg name]=[list of values]
    #Get all permutations of the arguments. Returns a pandas data frame with the argument names as the columns and the cartesian product of all their possible values.
    #Note that this can't handle None values (at least not yet)
    arg_keys = argarrs.keys()
    if len(arg_keys) == 0:
        raise ValueError(
            "Must be at least one keyword argument (if you don't want to train multiple models just use lists with single entries"
        )
    arg_tup = ()
    str_lens = []
    type_list = []
    M = 1
    for key in arg_keys:
        str_vals = [str(entry) for entry in argarrs[key]]
        str_lens.extend([len(entry) for entry in str_vals])
        type_list.append(argarrs[key].dtype)
        #print key,str_vals,str_lens
        M *= len(argarrs[key])
        #print str_vals,str_lens
        arg_tup += (str_vals, )
    #print 'debug',type_list
    max_str_lens = max(str_lens)
    all_arg_combos = np.zeros((M, len(arg_keys)),
                              dtype='S{0:d}'.format(max_str_lens))
    all_arg_combos = pd.DataFrame(cartesian(arg_tup, all_arg_combos),
                                  columns=arg_keys)
    for i, currtype in enumerate(type_list):
        if currtype == np.bool:
            all_arg_combos[arg_keys[i]] = (
                all_arg_combos[arg_keys[i]] == 'True')
        else:
            all_arg_combos[arg_keys[i]] = all_arg_combos[arg_keys[i]].astype(
                currtype)
    return all_arg_combos
Esempio n. 12
0
def gen_training_batch(n_input_dims, n_output_dims, n_possible_tasks, task_ids):
    n_input_units = n_input_dims*2
    n_output_units = n_output_dims*2
    n_inputs = 2**n_input_dims
    idx_list = []
    for i in range(n_input_dims):
        idx_list.append([i*2, i*2+1])
    idx_list = cartesian(idx_list)

    inputs_list = np.zeros((idx_list.shape[0], n_input_units))
    for i in range(idx_list.shape[0]):
        inputs_list[i, :][idx_list[i]] = 1

    inputs_list = np.tile(inputs_list, (len(task_ids), 1))

    task_list = np.zeros((n_inputs*len(task_ids), n_possible_tasks))
    for i in range(len(task_ids)):
        task_list[i*n_inputs:(i*n_inputs+n_inputs), task_ids[i]] = 1

    outputs_list = np.zeros((n_inputs*len(task_ids), n_output_units))
    for i in range(len(task_ids)):
        for j in range(len(task_ids[i])):
            input_dim, output_dim = get_task_dims(n_input_dims, n_output_dims, task_ids[i][j])
            input_pattern = inputs_list[i*n_inputs:(i*n_inputs+n_inputs), input_dim*2:input_dim*2+2]
            outputs_list[i*n_inputs:(i*n_inputs+n_inputs), output_dim*2:output_dim*2+2] = input_pattern

    return inputs_list, task_list, outputs_list
    def __init__(self,
                 resized_height,
                 resized_width,
                 p=-9,
                 return_2_terms=False,
                 device=torch.device('cpu')):
        """
        :param resized_height: Number of rows in the image.
        :param resized_width: Number of columns in the image.
        :param p: Exponent in the generalized mean. -inf makes it the minimum.
        :param return_2_terms: Whether to return the 2 terms
                               of the WHD instead of their sum.
                               Default: False.
        :param device: Device where all Tensors will reside.
        """
        super(nn.Module, self).__init__()

        # Prepare all possible (row, col) locations in the image
        self.height, self.width = resized_height, resized_width
        self.resized_size = torch.tensor([resized_height, resized_width],
                                         dtype=torch.get_default_dtype(),
                                         device=device)
        self.max_dist = math.sqrt(resized_height**2 + resized_width**2)
        self.n_pixels = resized_height * resized_width
        self.all_img_locations = torch.from_numpy(
            cartesian([np.arange(resized_height),
                       np.arange(resized_width)]))
        # Convert to appropiate type
        self.all_img_locations = self.all_img_locations.to(
            device=device, dtype=torch.get_default_dtype())

        self.return_2_terms = return_2_terms
        self.p = p
Esempio n. 14
0
    def _generate_sample(self, n, sample_type, generate_pars):
        # create the array using the spacing method of choice
        raw_sample = None
        if sample_type == "sobol":
            from sobol_seq import i4_sobol_generate

            raw_sample = i4_sobol_generate(len(generate_pars), n)
        elif sample_type == "saltelli":
            from SALib.sample import saltelli

            problem = {
                "names": generate_pars,
                "bounds": [[0, 1] for x in generate_pars],
                "num_vars": len(generate_pars),
            }
            raw_sample = saltelli.sample(problem, n, True)

        elif sample_type == "grid":
            from sklearn.utils.extmath import cartesian

            temp = np.linspace(0, 1, n)
            raw_sample = cartesian([temp for i in range(len(generate_pars))])

        elif sample_type == "random":
            raw_sample = np.random.random((n, len(generate_pars)))
        assert raw_sample is not None, "something went wrong - check that type is correct"
        print("expected shape is {}".format(raw_sample.shape))
        # map the raw array to bounds, adhering to log scaling rules
        scaled_sample = self.log_scale_matrix(raw_sample)
        return scaled_sample
def generate_logistic_parameters(features_num,
                                 step_size=0.2,
                                 min_val=-1.0,
                                 max_val=1.0,
                                 digits=1,
                                 items_as_np=True,
                                 include_zero=False):
    """
       i = round(max_val, digits)
        while i <= min_val:
            if include_zero or i != 0:
                feature_values.append(i)
            i -= step_size
            i = round(i ,digits)

    """

    feature_values = []

    i = round(max_val, digits)
    while i >= min_val:
        if include_zero or i != 0:
            feature_values.append(i)
        i -= step_size
        i = round(i, digits)

    feature_values = np.array(feature_values)
    features = (feature_values for _ in range(features_num))
    grid = cartesian(features)

    if items_as_np:
        grid = [np.array(i) for i in grid]

    return grid
Esempio n. 16
0
def extract_distances_series(nodes_list: pd.Series,
                             shortest_paths_matrix: np.matrix,
                             nodes_mapping: Dict) -> pd.Series:
    """
    Given a list of two list of nodes, return shortest paths between all the pair of nodes from
    each list
    e.g Given [[node_1, node_2],[node_3]] this will output shortest paths between 1 and 3 and 2
    and 3
    :param nodes_list:
    :param shortest_paths_matrix: matrix generated by extract_shortest_paths_matrix
    :param nodes_mapping: dict
    {
        node_name found in node_list : id used to encode the node in the matrix
    }
    e.g
    { node1: 1: node2: 2, node3: 3}
    :return:
    """
    if isinstance(
            nodes_list,
            list) and len(nodes_list) == 2 and nodes_list[0] and nodes_list[1]:
        mapped_nodes_1 = np.array(
            [nodes_mapping[node] for node in nodes_list[0]])
        mapped_nodes_2 = np.array(
            [nodes_mapping[node] for node in nodes_list[1]])
        c = cartesian((mapped_nodes_1, mapped_nodes_2))
        return shortest_paths_matrix[c[:, 0], c[:, 1]].tolist()[0]
Esempio n. 17
0
 def weights(dim, degree):
     # 1D sigma-points (x) and weights (w)
     x, w = hermegauss(degree)
     # hermegauss() provides weights that cause posdef errors
     w = factorial(degree) / (degree**2 * hermeval(x, [0] *
                                                   (degree - 1) + [1])**2)
     return np.prod(cartesian([w] * dim), axis=1)
Esempio n. 18
0
def compute_reward(grid_map, cell_list, passenger_list, rew):
    """
    Compute the reward matrix.

    Args:
        grid_map (list): list containing the grid structure;
        cell_list (list): list of non-wall cells;
        passenger_list (list): list of passenger cells;
        rew (tuple): rewards obtained in goal states.

    Returns:
        The reward matrix.

    """
    g = np.array(grid_map)
    c = np.array(cell_list)
    n_states = len(cell_list) * 2**len(passenger_list)
    r = np.zeros((n_states, 4, n_states))
    directions = [[-1, 0], [1, 0], [0, -1], [0, 1]]
    passenger_states = cartesian([[0, 1]] * len(passenger_list))

    for goal in np.argwhere(g == 'G'):
        for a in range(len(directions)):
            prev_state = goal - directions[a]
            if prev_state in c:
                for i in range(len(passenger_states)):
                    i_idx = np.where((c == prev_state).all(axis=1))[0] + len(
                        cell_list) * i
                    j_idx = j = np.where((c == goal).all(axis=1))[0] + len(
                        cell_list) * i

                    r[i_idx, a, j_idx] = rew[np.sum(passenger_states[i])]

    return r
Esempio n. 19
0
def compute_reward(grid_map, cell_list, passenger_list, rew):
    """
    Compute the reward matrix.

    Args:
        grid_map (list): list containing the grid structure;
        cell_list (list): list of non-wall cells;
        passenger_list (list): list of passenger cells;
        rew (tuple): rewards obtained in goal states.

    Returns:
        The reward matrix.

    """
    g = np.array(grid_map)
    c = np.array(cell_list)
    n_states = len(cell_list) * 2**len(passenger_list)
    r = np.zeros((n_states, 4, n_states))
    directions = [[-1, 0], [1, 0], [0, -1], [0, 1]]
    passenger_states = cartesian([[0, 1]] * len(passenger_list))

    for goal in np.argwhere(g == 'G'):
        for a in range(len(directions)):
            prev_state = goal - directions[a]
            if prev_state in c:
                for i in range(len(passenger_states)):
                    i_idx = np.where(
                        (c == prev_state).all(axis=1))[0] + len(cell_list) * i
                    j_idx = j = np.where(
                        (c == goal).all(axis=1))[0] + len(cell_list) * i

                    r[i_idx, a, j_idx] = rew[np.sum(passenger_states[i])]

    return r
Esempio n. 20
0
def zrand_convolve(labelgrid, neighbors='edges'):
    """
    Calculates the avg and std z-Rand index using kernel over `labelgrid`

    Kernel is determined by `neighbors`, which can include all entries with
    touching edges (i.e., 4 neighbors) or corners (i.e., 8 neighbors).

    Parameters
    ----------
    grid : (S, K, N) array_like
        Array containing cluster labels for each `N` samples, where `S` is mu
        and `K` is K.
    neighbors : str, optional
        How many neighbors to consider when calculating Z-rand kernel. Must be
        in ['edges', 'corners']. Default: 'edges'

    Returns
    -------
    zrand_avg : (S, K) np.ndarray
        Array containing average of the z-Rand index calculated using provided
        neighbor kernel
    zrand_std : (S, K) np.ndarray
        Array containing standard deviation of the z-Rand index
    """

    inds = cartesian([range(labelgrid.shape[0]), range(labelgrid.shape[1])])
    zrand = np.empty(shape=labelgrid.shape[:-1] + (2, ))
    for x, y in inds:
        ninds = get_neighbors(x, y, neighbors=neighbors, shape=labelgrid.shape)
        zrand[x, y] = zrand_partitions(labelgrid[ninds].T)

    return zrand[..., 0], zrand[..., 1]
Esempio n. 21
0
def calc_cartesian_group_assignment_p(group_assignment_p_list):
    cart = np.array(
        [[
           np.prod(values)
           for values in cartesian(line)]
         for line in zip(*group_assignment_p_list)])
    return cart
Esempio n. 22
0
    def search(self):
        arg_keywords = np.array([key for key, _ in self.data.items()])
        combinations = cartesian([self.data[x] for x in arg_keywords])
        print(combinations)
        print(combinations.shape[0], 'combinations.')
        results = []
        dictionaries_list = []

        for parameter_combination in combinations:
            # Initializing Population
            optimization = Optimization(
                self.image_path,
                population_size=self.population_size,
                polygons_count=self.polygons_count
            )

            dict = {}
            for column, value in enumerate(parameter_combination):
                dict[arg_keywords[column]] = value
            dictionaries_list.append(dict)
            results.append(optimization.evolve_during(**dict)[1])

        # Ordering results
        order = sorted(range(len(results)), key=lambda k: results[k], reverse=True)
        dictionaries_list = [dictionaries_list[i] for i in order]
        results = [results[i] for i in order]

        for index, result in enumerate(results):
            print('Improvement ' + str(result), '\tParameters: ' + str(dictionaries_list[index]))
Esempio n. 23
0
def rectangles_to_states(rectangles):
    states = []
    for rect in rectangles:
        rect_axes = [np.arange(rect_d[0], rect_d[1]) for rect_d in rect]
        states += list(cartesian(rect_axes)) 
    states =  np.array(states)
    return states
Esempio n. 24
0
    def CRS(self, UpperLimit, LowerLimit, SampleNum, ParaMode=None):
        try:
            if len(SampleNum) == 1:
                raise ValueError
        except ValueError as e:
            print(e.args)
            print('The parameter sets have to be at least two dimension.')
            exit()

        paratmp = np.zeros(len(UpperLimit), dtype=object)
        for itr, mode in enumerate(ParaMode):
            if mode == 'log scale':
                tmpfunc = np.logspace
            else:
                tmpfunc = np.linspace

            ParaSet = tmpfunc(UpperLimit[itr], LowerLimit[itr], SampleNum[itr])
            paratmp[itr] = ParaSet

        para = cartesian([paratmp[0], paratmp[1]])

        with open('CRS_Parameter.pickle', 'wb') as picklefile:
            cPickle.dump(para, picklefile, True)
            #==========LHS_Parameter.pickle==============#
            #[[Prod, Deg, Bind, Diffu
            #........................
            #........................
            # .......................]]
        print(para)
        return para
def factorial_design(n,d,plot_=False):
    # n is number of points in nth dimension
    # d is the number of factors
    # full factorial design is when the number of levels = number of factor
    #otmp=[]
    #if type(d) is int:
    otmp=d*[np.arange(n)]
    #else:
    #        for i in n:
    #        otmp.add(range(i))

    o=cartesian(otmp)

    D=(o-np.min(o))/(np.max(o)-np.min(o))
    if plot_:
        plt.close()
        fig1=plt.figure()
        ax=fig1.add_subplot(111)
        #fig,ax = plt.subplots()
        ax.scatter(D[:,0].reshape((D.shape[0],1)),D[:,1].reshape((D.shape[0],1)))
        ax.set_title('dim-1,dim-2 full factorial design')
        ax.set_xlabel('dim-1')
        ax.set_ylabel('dim-2')
        return(D,fig1)
        #fig.show()
    return(D)
Esempio n. 26
0
    def _bspline_direct_elementwise(self):
        # important later for reshapes into elem colloc matrices
        self.B = [ Bk.tocsr() for Bk in self.B ]
        Bel = [None] * self.domain.dim

        nip_el = self.quadrature.deg
        gridshape = [ self.domain.nelem(k) for k in range(0, self.domain.dim) ]

        # get elementwise univariate colloc matrices
        for k in range(0, self.domain.dim):
            Bel[k] = [None] * self.domain.nelem(k)
            for el in range(0, self.domain.nelem(k)):
                Bel[k][el] = self.B[k][:, nip_el[k]*el:nip_el[k]*el+nip_el[k]].data.reshape(-1, nip_el[k])

        # get kronecker jacobian on each element as a view of self.J
        Jel = [None] * self.domain.nelem()
        for el in range(0, self.domain.nelem()):
            el_mulidx = np.unravel_index(el, gridshape)
            slices = tuple([slice(nip_el[k]*el_mulidx[k], nip_el[k]*el_mulidx[k]+nip_el[k]) for k in range(0, self.domain.dim)])
            Jel[el] = self.J[slices].view().reshape(-1)
        
        # allocate and initialize global matrices
        A = np.zeros((np.prod(self.domain.nbfuns), np.prod(self.domain.nbfuns)))
        B = np.zeros((np.prod(self.domain.nbfuns), np.prod(self.domain.nbfuns)))

        # compute tensorproduct bsplines collocation matrix at greville abs
        Bi = self.Bgp[0]
        for k in range(1, self.domain.dim):
            Bi = kron(Bi, self.Bgp[k])
        Bi = Bi.tocsr()

        # compute element matrices and add contribution to A
        for el in range(0, self.domain.nelem()):
            # get kronecker xips on each element 
            el_mulidx = np.unravel_index(el, gridshape)
            slices = tuple([slice(nip_el[d]*el_mulidx[d], nip_el[d]*el_mulidx[d]+nip_el[d]) for d in range(0, self.domain.dim)])
            el_ip = [ self.quadrature.ip[d][slices[d]] for d in range(0, self.domain.dim) ]
            el_xip = [ self.domain.eval(el_ip, d) for d in range(self.domain.dim) ]

            # define index maps 
            ldofs = [ self.B[d][:,nip_el[d]*el_mulidx[d]].nonzero()[0] for d in range(0, self.domain.dim) ]
            gdofs = np.ravel_multi_index(cartesian(ldofs).transpose(), self.domain.nbfuns)

            # compute the kernel on the element
            Gel = self.kernel(
              _kern_pts_to_mulidx(self.gpp),
              _kern_pts_to_mulidx(el_xip), self.data)
        
            # precompute element basis matrices
            Bj = Bel[0][el_mulidx[0]] * self.quadrature.weights[0][slices[0]]
            for k in range(1, self.domain.dim):
                Bj = np.kron(Bj, Bel[k][el_mulidx[k]] * self.quadrature.weights[k][slices[k]])
            Bj = Bj * Jel[el]

            # assemble A on the element
            Apart = Gel @ Bj.transpose() # ich muss hier zwei mal uber die elemente summieren!!!!
                
            A[:,gdofs] += Apart

        return A, Bi.transpose()
 def _initialize_event_orders(self,timestamps,order_type):
     symb_matrix  = mth.cartesian([np.array(timestamps),self.event_matrix.columns.values])
     symb_matrix  = symb_matrix.reshape(len(timestamps),len(self.event_matrix.columns.values),2)
     order_timestamps  = symb_matrix[~np.isnan(self.event_matrix.values),0]
     order_dataframe   = pd.DataFrame(symb_matrix[~np.isnan(self.event_matrix.values),1], columns=['Symbol'] )
     order_dataframe['Buy'] = order_type
     return (order_dataframe,order_timestamps)
Esempio n. 28
0
def get_representations(dataset, postprocess_dir, dataset_name):
    batch_size = 32
    module_path = os.path.join(postprocess_dir, "tfhub")
    reps = []

    with hub.eval_function_for_module(module_path) as f:

        def _representation_function(x):
            """Computes representation vector for input images."""
            output = f(dict(images=x),
                       signature="representation",
                       as_dict=True)
            return np.array(output["default"])

        for index in range(0, len(dataset.images), batch_size):
            batch = dataset.images[
                index:min(index + batch_size, dataset.images.shape[0]), :]
            if dataset_name == "smallnorb":
                batch = np.expand_dims(batch, axis=3)

            rep = _representation_function(batch)
            reps.append(rep)
        reps = np.vstack(reps)

    # factors
    factors = cartesian(
        [np.array(list(range(i))) for i in dataset.factors_num_values])
    return factors, reps
Esempio n. 29
0
def find_weights(df, max_depth, init_step=0.1):
    trmse_weights = []
    for i in range(max_depth):
        curr_step = init_step / (2.0 ** i)
        if i == 0: 
            tbase_weights = [np.arange(0., 1, init_step) for i in range(n_dfs - 1)]
        else:
            tbase_weights = [np.arange(max(0., trmse_weights[0][1][i] - curr_step),
                                       min(1., trmse_weights[0][1][i] + curr_step * 2),
                                       curr_step) for i in range(n_dfs - 1)]
        tcartesian_w = cartesian(tbase_weights)
        tsummed_weights = np.sum(tcartesian_w, axis=1)
        tcartesian_w = tcartesian_w[tsummed_weights <= 1.0, :]
        tsummed_weights = tsummed_weights[tsummed_weights <= 1.0]
        tsummed_weights = tsummed_weights.reshape(-1, 1)
        tcartesian_w = np.hstack((tcartesian_w, 1. - tsummed_weights))
        
        print('Current depth:', str(i) + ';', tcartesian_w.shape[0], 'weight combinations')
        for j in range(tcartesian_w.shape[0]):
            if j % 100 == 0 and j > 100:
                print(j)
            trmse = calc_rmse(tcartesian_w[j, :], full_df)
            trmse_weights.append((trmse, tcartesian_w[j, :]))
            trmse_weights.sort(key=lambda x: x[0])
        print('Best result:', trmse_weights[0][0], '\n')
    return trmse_weights
def minimum_cost_flow_problem_graph(X, C, D, size_min, size_max):

    # Setup minimum cost flow formulation graph
    # Vertices indexes:
    # X-nodes: [0, n(x)-1], C-nodes: [n(X), n(X)+n(C)-1], C-dummy nodes:[n(X)+n(C), n(X)+2*n(C)-1],
    # Artificial node: [n(X)+2*n(C), n(X)+2*n(C)+1-1]

    # Create indices of nodes
    n_X = X.shape[0]
    n_C = C.shape[0]
    X_ix = np.arange(n_X)
    C_dummy_ix = np.arange(X_ix[-1] + 1, X_ix[-1] + 1 + n_C)
    C_ix = np.arange(C_dummy_ix[-1] + 1, C_dummy_ix[-1] + 1 + n_C)
    art_ix = C_ix[-1] + 1

    # Edges
    edges_X_C_dummy = cartesian(
        [X_ix, C_dummy_ix])  # All X's connect to all C dummy nodes (C')
    edges_C_dummy_C = np.stack(
        [C_dummy_ix, C_ix],
        axis=1)  # Each C' connects to a corresponding C (centroid)
    edges_C_art = np.stack([C_ix, art_ix * np.ones(n_C)],
                           axis=1)  # All C connect to artificial node

    edges = np.concatenate([edges_X_C_dummy, edges_C_dummy_C, edges_C_art])

    # Costs
    costs_X_C_dummy = D.reshape(D.size)
    costs = np.concatenate(
        [costs_X_C_dummy,
         np.zeros(edges.shape[0] - len(costs_X_C_dummy))])

    # Capacities - can set for max-k
    capacities_C_dummy_C = size_max * np.ones(n_C)
    cap_non = n_X  # The total supply and therefore wont restrict flow
    capacities = np.concatenate([
        np.ones(edges_X_C_dummy.shape[0]), capacities_C_dummy_C,
        cap_non * np.ones(n_C)
    ])

    # Sources and sinks
    supplies_X = np.ones(n_X)
    supplies_C = -1 * size_min * np.ones(n_C)  # Demand node
    supplies_art = -1 * (n_X - n_C * size_min)  # Demand node
    supplies = np.concatenate([
        supplies_X,
        np.zeros(n_C),  # C_dummies
        supplies_C,
        [supplies_art]
    ])

    # All arrays must be of int dtype for `SimpleMinCostFlow`
    edges = edges.astype('int32')
    costs = np.around(costs * 1000, 0).astype(
        'int32')  # Times by 1000 to give extra precision
    capacities = capacities.astype('int32')
    supplies = supplies.astype('int32')

    return edges, costs, capacities, supplies, n_C, n_X
Esempio n. 31
0
 def __init__(self, W, H, alpha=2):
     self.W = W
     self.H = H
     self.alpha = alpha
     self.all_img_locations = tf.convert_to_tensor(cartesian(
         [np.arange(W), np.arange(H)]),
                                                   dtype=tf.float32)
     self.max_dist = math.sqrt(W**2 + H**2)
Esempio n. 32
0
 def _set_state_combinations_if_necessary(self):
     """Get centroids"""
     # If we import sklearn at the top of the file then auto doc fails.
     if (self.state_combinations is None or
             self.state_combinations.shape[1] != len(self.model)):
         from sklearn.utils.extmath import cartesian
         centroids = [model['states'] for model in self.model]
         self.state_combinations = cartesian(centroids)
Esempio n. 33
0
 def _set_state_combinations_if_necessary(self):
     """Get centroids"""
     # If we import sklearn at the top of the file then auto doc fails.
     if (self.state_combinations is None
             or self.state_combinations.shape[1] != len(self.model)):
         from sklearn.utils.extmath import cartesian
         centroids = [model['states'] for model in self.model]
         self.state_combinations = cartesian(centroids)
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.
    The grid is a cartesian product between the columns of Z. The ith column of
    Z consists in ``grid_resolution`` equally-spaced points between the
    percentiles of the ith column of X.
    If ``grid_resolution`` is bigger than the number of unique values in the
    ith column of X, then those unique values will be used instead.
    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme values of
        the grid.
    grid_resolution : int
        The number of equally spaced points to be placed on the grid for a
        given column.
    Returns
    -------
    grid : ndarray, shape=(n_points, X.shape[1])
        All data points on the grid. n_points is always ``<= grid_resolution **
        X.shape[1]``.
    Z: list of ndarray
        The values with which the grid has been created. The ndarrays may be of
        different shape: either (grid_resolution,) or (n_unique_values,).
    """
    try:
        assert len(percentiles) == 2
    except (AssertionError, TypeError):
        raise ValueError('percentiles must be a sequence of 2 elements.')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentiles values must be in [0, 1].')
    if percentiles[0] >= percentiles[1]:
        raise ValueError('percentiles[0] must be strictly less '
                         'than percentiles[1].')

    if grid_resolution <= 1:
        raise ValueError('grid_resolution must be strictly greater than 1.')

    values = []
    for feature in range(X.shape[1]):
        uniques = np.unique(X[:, feature])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
            if np.allclose(emp_percentiles[0, feature],
                           emp_percentiles[1, feature]):
                raise ValueError('percentiles are too close to each other, '
                                 'unable to build the grid.')
            axis = np.linspace(emp_percentiles[0, feature],
                               emp_percentiles[1, feature],
                               num=grid_resolution, endpoint=True)
        values.append(axis)

    return cartesian(values), values
Esempio n. 35
0
 def create_bias_data(self, layer_index):
     is_weight = np.asarray([0])
     layer = np.asarray([layer_index])
     weight_row = np.asarray([0])
     weight_column = np.asarray([0])
     bias = np.arange(self.layer_sizes[layer_index][1])
     bias_data = cartesian([is_weight, layer, weight_row, weight_column, bias])
     bias_data = bias_data.astype(np.float32)
     return torch.autograd.Variable(torch.from_numpy(bias_data))
Esempio n. 36
0
def generate_CIELab_space(rgb_space=aRGB, axis_stride=0.1):
    # 3 axes, equal strides along each
    axes = [np.arange(0, 1+axis_stride, axis_stride)]*3
    rgb_points = cartesian(axes)
    lab_points = []
    for row in range(len(rgb_points)):
        lab_points.append(RGB_to_Lab(rgb_space, rgb_points[row, :]))
    # dist is squared euclidean, so JND threshold is 0.23^2
    return np.array(lab_points)
Esempio n. 37
0
    def setUp(self):
        self.useLocal = False

        if self.useLocal:
            self.tempdir = tempdir = '.'
        else:
            self.tempdir = tempdir = mkdtemp(prefix='patty-analytics')

        self.drivemapLas = os.path.join(tempdir, 'testDriveMap.las')
        self.sourcelas = os.path.join(tempdir, 'testSource.las')
        self.footprint_csv = os.path.join(tempdir, 'testFootprint.csv')
        self.foutlas = os.path.join(tempdir, 'testOutput.las')

        self.min = -10
        self.max = 10
        self.num_rows = 1000

        # Create plane with a pyramid
        dm_pct = 0.5
        dm_rows = np.round(self.num_rows * dm_pct)
        dm_min = self.min * dm_pct
        dm_max = self.max * dm_pct

        delta = dm_max / dm_rows
        shape_side = dm_max - dm_min

        dm_offset = [0, 0, 0]
        self.dense_obj_offset = [3, 2, -(1 + shape_side / 2)]

        # make drivemap
        plane_row = np.linspace(
            start=self.min, stop=self.max, num=self.num_rows)
        plane_points = cartesian((plane_row, plane_row, [0]))

        shape_points, footprint = make_tri_pyramid_with_base(
            shape_side, delta, dm_offset)
        np.savetxt(self.footprint_csv, footprint, fmt='%.3f', delimiter=',')

        dm_points = np.vstack([plane_points, shape_points])
        plane_grid = np.zeros((dm_points.shape[0], 6), dtype=np.float32)
        plane_grid[:, 0:3] = dm_points

        self.drivemap_pc = pcl.PointCloudXYZRGB(plane_grid)
        self.drivemap_pc = downsample_voxel(self.drivemap_pc,
                                            voxel_size=delta * 20)
        # utils.set_registration(self.drivemap_pc)
        utils.save(self.drivemap_pc, self.drivemapLas)

        # Create a simple pyramid
        dense_grid = np.zeros((shape_points.shape[0], 6), dtype=np.float32)
        dense_grid[:, 0:3] = shape_points + self.dense_obj_offset

        self.source_pc = pcl.PointCloudXYZRGB(dense_grid)
        self.source_pc = downsample_voxel(self.source_pc, voxel_size=delta * 5)
        utils.save(self.source_pc, self.sourcelas)
def interpolate_image(image_data, zoom_factor):

    X = np.arange(image_data.shape[0])
    Y = np.arange(image_data.shape[1])

    rgi = RegularGridInterpolator((X, Y), image_data)

    grid_x, grid_y = (np.linspace(0, len(X)-1, zoom_factor*len(X)), 
                      np.linspace(0, len(Y)-1, zoom_factor*len(Y)))

    return rgi(cartesian([grid_x, grid_y])).reshape(grid_x.shape[0], grid_y.shape[0])
Esempio n. 39
0
def test_cgauss_likelihood():
    mu = np.array([0], dtype='float')
    sigma = np.array([2], dtype='float')
    x = np.linspace(-1, 2, 2)
    lapse = np.array([0], dtype='float')
    parameters = cartesian((mu, sigma, lapse, x))
    proportionMethod = PsiMarginal.pf(parameters, psyfun='cGauss')
    samples = np.random.normal(mu, sigma, (200000, 1))
    proportionSamples = np.empty([2, ])
    proportionSamples[0] = np.mean(samples <= x[0])  # cdf is p(X<=x), compute this through sampling to check likelihood
    proportionSamples[1] = np.mean(samples <= x[1])
    np.testing.assert_almost_equal(proportionSamples, proportionMethod, decimal=2) == 1
Esempio n. 40
0
def voxel2voxels_in_volume(x, y, z, stepX, stepY, stepZ):
    """
    Returns a numpy array with all the voxels in the volume corresponding to representative (x, y, z).

    Here we assume that the representative is the upper, left, front pixel of a (stepX, stepY, stepZ) sized volume.
    """
    # This is what Andrew originally used. Probably not fully correct, but practical.
    # We could also just return slices and let numpy do its tiling magic...
    # This should be hidden in an up/down sampler object
    return cartesian((np.arange(x, x + stepX),
                      np.arange(y, y + stepY),
                      np.arange(z, z + stepZ)))
Esempio n. 41
0
    def stateact_to_feature(self, state, act, onlyindex=True):
        zedaind = []
        for nm, xs in sorted(self.feature_tiles.items()):
            val = None
            if nm == 'speedx':
                val = state.getSpeedX()
            elif nm == 'trackpos':
                val = state.getTrackPos()
            elif nm == 'angle':
                val = state.getAngle()
            
            #print val, nm
            inds = []
            if not val == None:
                # on of the above
                for i in range(len(xs) - 1):
                    if xs[i][0] <= val < xs[i + 1][1]:
                        inds.append(i)

                zedaind.append(inds)

            elif nm == 'track':
                # remaning are trackpositions, lets get them
                tracks = np.array(state.getTrack()) / 200.
                sensors = []

                sensors.append(tracks[3]) # -40
                sensors.append((tracks[4] + tracks[5] + tracks[6])/3.)
                sensors.append((tracks[9] + tracks[8] + tracks[10]) / 3.) # 0
                sensors.append((tracks[12] + tracks[13] + tracks[14])/3.)
                sensors.append(tracks[15])
                if self.arguments.show_sensors:
                    print sensors
                for val in sensors:
                    for i in range(len(xs) - 1):
                        if xs[i] <= val <= xs[i + 1]:
                            ind.append(i)
                            break
            else:
                assert False
        zedaind.append([act]) 
        #print 'feature shape-', self.w.shape,'index length-',  len(ind)
        #print ind
        assert len(zedaind) == len(self.w.shape), 'ind %s, w %s' %(str(ind), str(self.w.shape))
        if onlyindex:
            return tuple(ind)
        else:
            ft = np.zeros_like(self.w)

            for tot in cartesian(zedaind):
                ft[tuple(tot)] = 1
            return ft
 def optimize(self):
     best_sharpe_ratio = 0
     best_allocation = []
     num_symbols = len(self.portfolio.get_symbols())
     steps = numpy.linspace(0, 1, 1/self.stepsize + 1)
     allocations = cartesian([steps]*num_symbols)
     legal_allocations = allocations[numpy.where(allocations.sum(1)==1)]
     for allocation in legal_allocations:
         sharpe = self.portfolio.simulate(allocation)[2]
         if sharpe > best_sharpe_ratio:
             best_sharpe_ratio = sharpe
             best_allocation = allocation
     return (best_allocation, best_sharpe_ratio)
     
Esempio n. 43
0
    def train(self, metergroup, num_states_dict={}, **load_kwargs):
        """Train using 1D CO. Places the learnt model in the `model` attribute.

        Parameters
        ----------
        metergroup : a nilmtk.MeterGroup object

        Notes
        -----
        * only uses first chunk for each meter (TODO: handle all chunks).
        """

        if self.model:
            raise RuntimeError(
                "This implementation of Combinatorial Optimisation"
                " does not support multiple calls to `train`.")

        num_meters = len(metergroup.meters)
        if num_meters > 12:
            max_num_clusters = 2
        else:
            max_num_clusters = 3

        for i, meter in enumerate(metergroup.submeters().meters):
            print("Training model for submeter '{}'".format(meter))
            for chunk in meter.power_series(**load_kwargs):
                num_total_states = num_states_dict.get(meter)
                if num_total_states is not None:
                    num_on_states = num_total_states - 1
                else:
                    num_on_states = None
                states = cluster(chunk, max_num_clusters, num_on_states)
                self.model.append({
                    'states': states,
                    'training_metadata': meter})
                break  # TODO handle multiple chunks per appliance

        # Get centroids
        # If we import sklearn at the top of the file then auto doc fails.
        from sklearn.utils.extmath import cartesian
        centroids = [model['states'] for model in self.model]
        self.state_combinations = cartesian(centroids)
        # self.state_combinations is a 2D array
        # each column is a chan
        # each row is a possible combination of power demand values e.g.
        # [[0, 0, 0, 0], [0, 0, 0, 100], [0, 0, 50, 0],
        #  [0, 0, 50, 100], ...]

        print("Done training!")
Esempio n. 44
0
def spread_points_in_hypercube(point_count, dimension_count):  # TODO rename points_spread_in_hypercube
    """
    Place points in a unit hypercube such that the minimum distance between
    points is approximately maximal.
    
    Euclidean distance is used.
    
    .. note:: Current implementation simply puts the points in a hypergrid
    
    Parameters
    ----------
    point_count : int
        Number of points to pick
    dimension_count : int
        Number of dimensions of the hypercube
        
    Returns
    -------
    np.array(shape=(point_count, dimension_count))
        Points spread approximately optimally across the hypercube.
        
    Raises
    ------
    ValueError
        When ``point_count < 0 or dimension_count < 1``
        
    Notes
    -----
    The exact solution to this problem is known for only a few `n`.
    
    References
    ----------
    .. [1] http://stackoverflow.com/a/2723764/1031434
    """
    # Current implementation simply puts points in a grid
    if point_count < 0:
        raise ValueError("point_count must be at least 0")
    if dimension_count < 1:
        raise ValueError("dimension_count must be at least 1")
    if point_count == 0:
        return np.empty(shape=(0, dimension_count))
    side_count = np.ceil(point_count ** (1 / dimension_count))  # number of points per side
    points = np.linspace(0, 1, side_count)
    points = cartesian([points] * dimension_count)
    return np.random.permutation(points)[:point_count]  # XXX permutation is unnecessary
Esempio n. 45
0
def cartesian_prod_dicts_lists(the_dict):
    #takes a dictionary and produces a dictionary of the cartesian product of the input
    if not type(the_dict) is type(ordDict()):
        warnings.warn('An ordered dict was not used. Thus if this function is called again with the same dict it might not produce the same results.')

    from sklearn.utils.extmath import cartesian

    stim_list = []
    stim_list = tuple([ list(the_dict[ key_name ]) for key_name in the_dict ])

    #cartesian has the last column change the fastest, thus is like c-indexing
    stim_cart_array = cartesian(stim_list)

    cart_dict = ordDict()
    #load up the vectors assosciated with keys to cart_dict
    for key_name, key_num in zip(the_dict, range(len(the_dict))):
        cart_dict[key_name] = stim_cart_array[:, key_num]

    return cart_dict
Esempio n. 46
0
 def generate_predictor_data(self):
     from sklearn.utils.extmath import cartesian
     ps = np.linspace(*self.train_p_range)
     Ts = np.linspace(*self.train_T_range)
     rhs = atanspace(*self.train_rh_range, scaling=2.5)
     data = cartesian([ps, Ts, rhs])
     # Remove some (for Innsbruck) unrealistic data
     remove = (
             # Lower atmosphere is rather warm
             ((data[:,0] > 700) & (data[:,1] < 230))
             # Middle atmosphere
             | ((data[:,0] < 700) & (data[:,0] > 400)
                 & (data[:,1] > 300) | (data[:,1] < 200))
             # Upper atmosphere is rather cold
             | ((data[:,0] < 400) & (data[:,1] > 270))
             )
     data = data[~remove]
     # Calculate q
     data[:,2] = data[:,2] * qsat(p=data[:,0], T=data[:,1])
     return data
    def __init__(self, T, N, eta, tau0, kappa, lambda_init=np.asarray([])):
        """
        Arguments:
        T: Length of SNP sequence
        N: Total number of people in the population.
        eta: Hyperparameter for prior on haplotype weights pi
        tau0: A (positive) learning parameter that downweights early iterations
        kappa: Learning rate: exponential decay rate---should be between
             (0.5, 1.0] to guarantee asymptotic convergence.

        Note that if you pass the same data in every time and
        set kappa=0 this class can also be used to do batch VB.
        """

        self._K = pow(2,T)
        self._T = T
        self._N = N

        # pi dist hyperparams
        self._eta = eta

        self._tau0 = tau0 + 1
        self._kappa = kappa

        # iteration counter, used for updating rho
        self._updatect = 0

        # Initialize the variational distribution q(pi|lambda)
        if (lambda_init.shape==(self._K,)):
            self._lambda = lambda_init
        else:
            # todo: not totally sure this is a sensible initialization
            self._lambda = np.random.gamma(10, 1. / 10, self._K)
        self._E_log_pi = dirichlet_expectation(self._lambda)
        self._exp_E_log_pi = np.exp(self._E_log_pi)

        #all theta values
        theta = cartesian(np.repeat(np.array([[0.01,0.99]]),T,0))
        self.logs_theta = np.zeros([self._K, self._T, 2])
        self.logs_theta[:,:,0] = np.log(theta)
        self.logs_theta[:,:,1] = np.log(1-theta)
Esempio n. 48
0
 def generate_state_combinations_all(self):
     mains = self.loc.elec.mains()
     
     from sklearn.utils.extmath import cartesian
     centroids = [model['states'] for model in self.co.model]
     state_combinations = cartesian(centroids)
             
     baseline = self.vampire_power
     if baseline is None:
         vampire_power = mains.vampire_power() 
     else:
         vampire_power = self.vampire_power
     n_rows = state_combinations.shape[0]
     vampire_power_array = np.zeros((n_rows, 1)) + vampire_power
     state_combinations = np.hstack((state_combinations, vampire_power_array))
     summed_power_of_each_combination = np.sum(state_combinations, axis=1)
             
     self.vampire_power = vampire_power
     self.state_combinations = state_combinations
     self.summed_power_of_each_combination = summed_power_of_each_combination
     return vampire_power, state_combinations, summed_power_of_each_combination
Esempio n. 49
0
def constructTensor(med_file, diag_file):    
    diag_med_comb = diag_cross_med(med_file, diag_file)
	## create index map for subject_id, icdcode, and med_name
    patDict = createIndexMap(diag_med_comb.subject_id)
    medDict = createIndexMap(np.hstack(diag_med_comb.med_name))
    diagDict = createIndexMap(np.hstack(diag_med_comb.code))
    
    tensorIdx = np.array([[0,0,0]])
    tensorVal = np.array([[0]])
    for i in xrange(diag_med_comb.shape[0]):
        curDiag = [diagDict[x] for x in diag_med_comb.iloc[i,0]]
        curMed = [medDict[x] for x in diag_med_comb.iloc[i,1]]
        curPatId = patDict[diag_med_comb.iloc[i,2]]
        dmCombo = extmath.cartesian((curDiag, curMed))
        tensorIdx = np.append(tensorIdx,np.column_stack((np.repeat(curPatId, dmCombo.shape[0]), dmCombo)),axis=0)
        tensorVal = np.append(tensorVal, np.ones((dmCombo.shape[0],1), dtype=np.int), axis=0)

    tensorIdx = np.delete(tensorIdx, (0), axis=0)
    tensorVal = np.delete(tensorVal, (0), axis=0)
    tenX = sptensor.sptensor(tensorIdx, tensorVal, np.array([len(patDict), len(diagDict), len(medDict)]))
    axisDict = {0: patDict, 1: diagDict, 2: medDict}
    
    return tenX, axisDict
Esempio n. 50
0
def symmetry_score(transformation, left, right, stepz=100, ignore_value=0):
    """Counts how many elements in reflected img2 are equal in img1."""
    sizex, sizey, sizez = left.shape
    score = 0
    for zstart in range(0, sizez, stepz):
        # Generate original coordinates
        coords = cartesian((np.arange(sizex),
                            np.arange(sizey),
                            np.arange(zstart, min(sizez, zstart + stepz))))
        # Reflect coordinates
        reflected_coords = transform_coords(transformation, coords)
        # Find valid transformations
        valid_coords = ((reflected_coords >= 0) &
                        (reflected_coords < (sizex, sizey, sizez))).all(axis=1)
        coords = coords[valid_coords]
        reflected_coords = reflected_coords[valid_coords]
        # print('There were %d of %d reflected points out of boundaries' %
        #       ((~valid_coords).sum(), len(valid_coords)))
        # Compute score
        equal = left[tuple(coords.T)] == right[tuple(reflected_coords.T)]
        valid = (left[tuple(coords.T)] != ignore_value) & (right[tuple(reflected_coords.T)] != ignore_value)
        score += np.sum(equal & valid)

    return score
    def get_constrained_state_combinations(self, valid_locations, last_combination_appliances, loc, vampire_power):
        #This method constructs only the valid state combinations from the beginning.
        
        #TODO any or all
        appliances_in_valid_locations_temp = [app for app in loc.metadata.appliances_location if all(locs in loc.metadata.appliances_location[app] for locs in valid_locations)]
        appliances_in_valid_locations_temp.extend(last_combination_appliances)
        
        #Fridge mayalways start running
        #TODO append 5 
        #TODO include always consuming appliances
        appliances_in_valid_locations_temp.append(5)
        
        appliances_in_valid_locations = list(set(appliances_in_valid_locations_temp))
        
        #Take care of REDDs tuples names (3,4) and (10,20)
        if loc.name == 'REDD':
            if 10 in appliances_in_valid_locations:
                appliances_in_valid_locations.remove(10)
                appliances_in_valid_locations.remove(20)
                appliances_in_valid_locations.append((10,20))
            if 3 in appliances_in_valid_locations:
                appliances_in_valid_locations.remove(3)
                appliances_in_valid_locations.remove(4)
                appliances_in_valid_locations.append((3,4))
           
        centroids = [model['states'] for model in self.model if  model['training_metadata'].instance() in appliances_in_valid_locations]
        ordering  = [model['training_metadata'].instance() for model in self.model if  model['training_metadata'].instance() in appliances_in_valid_locations]

        from sklearn.utils.extmath import cartesian
        state_combinations = cartesian(centroids)
        n_rows = state_combinations.shape[0]
        vampire_power_array = np.zeros((n_rows, 1)) + vampire_power
        state_combinations = np.hstack((state_combinations, vampire_power_array))
        summed_power_of_each_combination = np.sum(state_combinations, axis=1)

        return state_combinations, summed_power_of_each_combination, ordering
def apc370models(nMeans=10, nSD=10, perc=5):
    #the parameters of the shapes

    mat = l.loadmat(top_dir + 'data/models/PC2001370Params.mat')
    s = mat['orcurv'][0]

    #adjustment for repeats [ 14, 15, 16,17, 318, 319, 320, 321]
    a = np.hstack((range(14), range(18,318)))
    a = np.hstack((a, range(322, 370)))
    s = s[a]


    nStim = np.size(s,0)

    angularPosition = []
    curvature = []
    paramLens = []

    for shapeInd in range(nStim):
        angularPosition.append(s[shapeInd][:, 0])
        curvature.append(s[shapeInd][:, 1])
        paramLens.append(np.size(s[shapeInd],0))

    angularPosition = np.array(list(itertools.chain.from_iterable(angularPosition)))
    angularPosition.shape = (np.size(angularPosition),1)

    curvature = np.array(list(itertools.chain.from_iterable(curvature)))
    curvature.shape = (np.size(curvature),1)

    #variable section length striding
    inds = np.empty((2,np.size(paramLens)),dtype = np.intp)
    inds[1,:] = np.cumsum(np.array(paramLens), dtype = np.intp) #ending index
    inds[0,:] = np.concatenate(([0,], inds[1,:-1])) #beginning index

    maxAngSD = np.deg2rad(171)
    minAngSD = np.deg2rad(23)
    maxCurSD = 0.98
    minCurSD = 0.09

    #make this into a pyramid based on d-prime
    orMeans = np.linspace(0, 2*pi-2*pi/nMeans, nMeans)
    orSDs = np.logspace(np.log10(minAngSD),  np.log10(maxAngSD),  nSD)
    curvMeans = np.linspace(-0.5, 1,nMeans)
    curvSDs = np.logspace(np.log10(minCurSD),  np.log10(maxCurSD),  nSD)
    modelParams = cartesian([orMeans,curvMeans,orSDs,curvSDs])
    nModels = np.size( modelParams, 0)

    a = st.vonmises.pdf(angularPosition, kappa = modelParams[:,2]**-1 , loc =  modelParams[:,0]) #
    b = st.norm.pdf(curvature, modelParams[:,1],  modelParams[:,3])
    temp = a * b

    models = np.empty(( 362, nModels ))

    for shapeInd in range(nStim):
        models[ shapeInd, : ] = np.max( temp[ inds[ 0, shapeInd ] : inds[ 1 , shapeInd ] , : ] ,  axis = 0 )

    models = models - np.mean(models,axis = 0)
    magnitude = np.linalg.norm( models, axis = 0)
    magnitude.shape=(1,nModels)
    models = models / magnitude
    del a,b, temp
    return models, modelParams
Esempio n. 53
0
    def __init__(self, stimRange, Pfunction='cGauss', nTrials=50, threshold=None, thresholdPrior=('uniform', None),
                 slope=None, slopePrior=('uniform', None),
                 guessRate=None, guessPrior=('uniform', None), lapseRate=None, lapsePrior=('uniform', None),
                 marginalize=True, thread=True):

        # Psychometric function parameters
        self.stimRange = stimRange  # range of stimulus intensities
        self.version = 1.0
        self.threshold = np.arange(-10, 10, 0.1)
        self.slope = np.arange(0.005, 20, 0.1)
        self.guessRate = np.arange(0.0, 0.11, 0.05)
        self.lapseRate = np.arange(0.0, 0.11, 0.05)
        self.marginalize = marginalize  # marginalize out nuisance parameters gamma and lambda?
        self.psyfun = Pfunction
        self.thread = thread

        if threshold is not None:
            self.threshold = threshold
            if np.shape(self.threshold) == ():
                self.threshold = np.expand_dims(self.threshold, 0)
        if slope is not None:
            self.slope = slope
            if np.shape(self.slope) == ():
                self.slope = np.expand_dims(self.slope, 0)
        if guessRate is not None:
            self.guessRate = guessRate
            if np.shape(self.guessRate) == ():
                self.guessRate = np.expand_dims(self.guessRate, 0)
        if lapseRate is not None:
            self.lapseRate = lapseRate
            if np.shape(self.lapseRate) == ():
                self.lapseRate = np.expand_dims(self.lapseRate, 0)

        # Priors
        self.thresholdPrior = thresholdPrior
        self.slopePrior = slopePrior
        self.guessPrior = guessPrior
        self.lapsePrior = lapsePrior

        self.priorMu = self.__genprior(self.threshold, *thresholdPrior)
        self.priorSigma = self.__genprior(self.slope, *slopePrior)
        self.priorGamma = self.__genprior(self.guessRate, *guessPrior)
        self.priorLambda = self.__genprior(self.lapseRate, *lapsePrior)

        # if guess rate equals lapse rate, and they have equal priors,
        # then gamma can be left out, as the distributions will be the same
        self.gammaEQlambda = all((all(self.guessRate == self.lapseRate), all(self.priorGamma == self.priorLambda)))
        # likelihood: table of conditional probabilities p(response | alpha,sigma,gamma,lambda,x)
        # prior: prior probability over all parameters p_0(alpha,sigma,gamma,lambda)
        if self.gammaEQlambda:
            self.dimensions = (len(self.threshold), len(self.slope), len(self.lapseRate), len(self.stimRange))
            self.likelihood = np.reshape(
                pf(cartesian((self.threshold, self.slope, self.lapseRate, self.stimRange)), psyfun=Pfunction), self.dimensions)
            # row-wise products of prior probabilities
            self.prior = np.reshape(
                np.prod(cartesian((self.priorMu, self.priorSigma, self.priorLambda)), axis=1), self.dimensions[:-1])
        else:
            self.dimensions = (len(self.threshold), len(self.slope), len(self.guessRate), len(self.lapseRate), len(self.stimRange))
            self.likelihood = np.reshape(
                pf(cartesian((self.threshold, self.slope, self.guessRate, self.lapseRate, self.stimRange)), psyfun=Pfunction), self.dimensions)
            # row-wise products of prior probabilities
            self.prior = np.reshape(
                np.prod(cartesian((self.priorMu, self.priorSigma, self.priorGamma, self.priorLambda)), axis=1), self.dimensions[:-1])

        # normalize prior
        self.prior = self.prior / np.sum(self.prior)

        # Set probability density function to prior
        self.pdf = np.copy(self.prior)

        # settings
        self.iTrial = 0
        self.nTrials = nTrials
        self.stop = 0
        self.response = []
        self.stim = []

        # Generate the first stimulus intensity
        self.minEntropyStim()
Esempio n. 54
0
    def getStartingColors(self, hueFilters=[], lightnessRange=[25,85],
        onlyUseRGB=True):
        """Randomly select a starting color from a subset of CIE Lab space.

        This function returns a set of highly preferable colors within a
        subspace of the typical 8,325-color CIE Lab space that fall within the
        range of any hue filters. Rather than the normal every-5 interval, the
        subspace specifies an every-15 interval along L, a, and b axis starting
        at the origin.

        Args:
            hueFilters (np.array): an n by 2 nd.array specifying lower and upper
                hue filter bounds that fall within [0,360) degrees.
            lightnessRange (list): a two-element list that sets the lightness
                range for filtering for color space before sampling.
            onlyUseRGB (bool): whether color space should be restricted to RGB.

        Returns:
            startingColors (np.array): an n x 3 array of n highly preferable CIE
                Lab D65 starting colors.
        """
        hueFilters = np.array(hueFilters)

        lIntervals = CIE_LAB_STARTING_SUBSPACE_INTERVALS["L"]
        aIntervals = CIE_LAB_STARTING_SUBSPACE_INTERVALS["a"]
        bIntervals = CIE_LAB_STARTING_SUBSPACE_INTERVALS["b"]

        isInterval = np.zeros((self.colorSpaces.shape[0], 3))
        isInterval[:,0] = np.in1d(self.colorSpaces[:,0], lIntervals)
        isInterval[:,1] = np.in1d(self.colorSpaces[:,1], aIntervals)
        isInterval[:,2] = np.in1d(self.colorSpaces[:,2], bIntervals)
        isIntervalMask = np.all(isInterval, axis=1)

        startColors = self.colorSpaces[isIntervalMask]

        isRGB = np.logical_and(startColors[:,[6,7,8]] >= 0, startColors[:,[6,7,8]] <= 255)
        isRGB = np.all(isRGB, axis=1)

        if lightnessRange[0] <= 10:
            minLightness = 0
        else:
            minLightness = lightnessRange[0] + 0.01
        if lightnessRange[1] <= 15:
            maxLightness = 15
        else:
            maxLightness = lightnessRange[1]

        inLightness = np.logical_not(np.logical_or(startColors[:,0] <
            minLightness, startColors[:,0] > maxLightness))

        startColors = startColors[np.logical_and(isRGB, inLightness)]

        if hueFilters.size > 0:
            hueFilters = convert.convertHueRanges(hueFilters)
            okHue = [np.logical_and(startColors[:,3] >= low,
                    startColors[:,3] <= high) for low,high in hueFilters]
            okHue = np.any(np.array(okHue), axis=0)
            startColors = startColors[okHue]

        # With the remaining subspace, enumerate all unique color pairs.
        # For efficiency, unique pairs are calculated via one of the triangles
        # of the cartesian product of all remaining colors.
        labs = startColors[:,:3]
        color_col_products = [cartesian((labs[:,i],labs[:,i]))
                                for i in xrange(labs.shape[1])]
        productSize = (color_col_products[0].shape[0],2*len(color_col_products))
        color_product = np.zeros(productSize)
        for i, d in enumerate(color_col_products):
            color_product[:,i] = d[:,0]
            color_product[:,i+len(color_col_products)] = d[:,0]
        idxs = np.transpose(np.array(np.triu_indices(len(labs),1)))
        colorPairs = np.ascontiguousarray(labs[idxs,].reshape((-1, 6)))
        colorPairPreferenceScores = npc.score(colorPairs)[:,2]

        # Penalize preference scores for colors that are ``ugly''.
        labs1 = np.ascontiguousarray(colorPairs[:,:3])
        labs2 = np.ascontiguousarray(colorPairs[:,3:6])
        penalties = np.minimum(npc.scorePenalty(labs1)[:,0],
                                npc.scorePenalty(labs2)[:,0])
        colorPairPreferenceScores = colorPairPreferenceScores * penalties

        maxPref = np.max(colorPairPreferenceScores)
        stdPref = np.std(colorPairPreferenceScores)
        prefThreshold = maxPref - 0.75*stdPref

        colorPairs = colorPairs[colorPairPreferenceScores > prefThreshold,]

        # Extract the unique colors from color combination list
        # http://stackoverflow.com/questions/16970982
        def getUnique(a):
            a = colorPairs[:,:3]
            b = np.ascontiguousarray(a).view(np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
            _, idx = np.unique(b, return_index=True)
            return a[idx]

        uniq1 = getUnique(colorPairs[:,:3])
        uniq2 = getUnique(colorPairs[:,3:])
        startingColors = getUnique( np.vstack(( uniq1, uniq2 )) )

        return startingColors
Esempio n. 55
0
def compute_probabilities(grid_map, cell_list, passenger_list, prob):
    """
    Compute the transition probability matrix.

    Args:
        grid_map (list): list containing the grid structure;
        cell_list (list): list of non-wall cells;
        passenger_list (list): list of passenger cells;
        prob (float): probability of success of an action.

    Returns:
        The transition probability matrix;

    """
    g = np.array(grid_map)
    c = np.array(cell_list)
    n_states = len(cell_list) * 2**len(passenger_list)
    p = np.zeros((n_states, 4, n_states))
    directions = [[-1, 0], [1, 0], [0, -1], [0, 1]]
    passenger_states = cartesian([[0, 1]] * len(passenger_list))

    for i in range(n_states):
        idx = i // len(cell_list)
        collected_passengers = np.array(
            passenger_list)[np.argwhere(passenger_states[idx] == 1).ravel()]
        state = c[i % len(cell_list)]

        if g[tuple(state)] in ['.', 'S', 'F']:
            if g[tuple(state)] in ['F']\
                    and state.tolist() not in collected_passengers.tolist():
                continue
            for a in range(len(directions)):
                new_state = state + directions[a]

                j = np.where((c == new_state).all(axis=1))[0]
                if j.size > 0:
                    assert j.size == 1

                    if g[tuple(new_state)] == 'F' and new_state.tolist()\
                            not in collected_passengers.tolist():
                        current_passenger_state = np.zeros(len(passenger_list))
                        current_passenger_idx = np.where(
                            (new_state == passenger_list).all(axis=1))[0]
                        current_passenger_state[current_passenger_idx] = 1
                        new_passenger_state = passenger_states[
                            idx] + current_passenger_state
                        new_idx = np.where((
                            passenger_states == new_passenger_state).all(
                            axis=1))[0]

                        j += len(cell_list) * new_idx
                    else:
                        j += len(cell_list) * idx
                else:
                    j = i

                p[i, a, j] = prob

                for d in [1 - np.abs(directions[a]),
                          np.abs(directions[a]) - 1]:
                    slip_state = state + d
                    k = np.where((c == slip_state).all(axis=1))[0]
                    if k.size > 0:
                        assert k.size == 1

                        if g[tuple(slip_state)] == 'F' and slip_state.tolist()\
                                not in collected_passengers.tolist():
                            current_passenger_state = np.zeros(
                                len(passenger_list))
                            current_passenger_idx = np.where(
                                (slip_state == passenger_list).all(axis=1))[0]
                            current_passenger_state[current_passenger_idx] = 1
                            new_passenger_state = passenger_states[
                                idx] + current_passenger_state
                            new_idx = np.where((
                                passenger_states == new_passenger_state).all(
                                axis=1))[0]

                            k += len(cell_list) * new_idx
                        else:
                            k += len(cell_list) * idx
                    else:
                        k = i

                    p[i, a, k] += (1. - prob) * .5

    return p
Esempio n. 56
0
def design_matrix(sample_labels, interaction_indices=None):
    """
    Parameters
    ---------
    sample_labels: 
        a numpy matrix, for each sample a vector with the conditions
        which we would like to model.
        cols represent the type of conditions we want to model,
        row represent a combination of conditions that are represented by the row-variable.
        if we have a 2x3 design we build this matrix:
        [[0,0],
         [0,1],
         [0,2],
         [1,0],
         [1,1],
         [1,2]]
        
        
    
    Returns
    -------
    X: the design matrix.
    factor_labels: the labels of the design-matrix columns
    factor_num : number of factors for each condition
    
    """
        
    factor_num = []
    n_factors = 0
    
    for i in range(sample_labels.shape[1]):
        unique_labels = np.unique(sample_labels[:,i])
        if len(unique_labels) == 1:
            label_factors = 0
        else:
            label_factors = len(unique_labels)
        
        n_factors+=label_factors
        factor_num.append(label_factors)
    
    n_interactions = 0
    if interaction_indices != None:
        interaction_factors = np.array(factor_num)[[interaction_indices]]
        n_interactions = np.prod(interaction_factors)
        Xint = np.zeros((sample_labels.shape[0], n_interactions))
    
    
    X = np.zeros((sample_labels.shape[0], n_factors))
    
    lb = LabelEncoder()
    factor_labels = []
    offset = 0
    for i, factor in enumerate(factor_num):
        if factor == 0:
            continue
        index = lb.fit_transform(sample_labels.T[i])
        for j in range(sample_labels.shape[0]):
            X[j,index[j]+offset] = 1
        
        factor_labels.append(lb.classes_)
        
        offset += factor
    
    if interaction_indices != None:
        interaction_product = [np.arange(v).tolist() for v in interaction_factors]
        interaction_gen = cartesian(interaction_product)
        
        # This is buggy!!
        Xint = np.zeros((sample_labels.shape[0], n_interactions))
        offset = interaction_indices[0] * np.sum(factor_num[:interaction_indices[0]])
        offset = np.int(offset)
        for i, int_indices in enumerate(interaction_gen):
            
            index1 = offset + int_indices[0]
            index2 = offset + int_indices[1] + factor_num[interaction_indices[0]]
            
            Xint[:,i] = X[:,index1] * X[:,index2]
            
            factor1 = interaction_indices[0]
            factor2 = interaction_indices[1]

            new_label = factor_labels[factor1][int_indices[0]] + "_" + \
                        factor_labels[factor2][int_indices[1]]
                        
            factor_labels.append(new_label)
        
        X = np.hstack((X, Xint))
        
    return X, np.hstack(factor_labels), factor_num
Esempio n. 57
0
# Show confusion matrix in a separate window
plt.matshow(cm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()


#for the plots, create arrays of outputs of the continuous predictor variables
h=50.0
balance_=np.linspace(Xdf['balance'].min(),Xdf['balance'].max(),h)
h=100.0
income_=np.linspace(Xdf['income'].min(),Xdf['income'].max(),h)
#create combinations of the predictors using the arrays above
combos = pd.DataFrame(cartesian([balance_,[0.0,1.0],income_,[1.]]))
combos.columns=['balance','student','income','intercept']
#run the fitted model on all the predictor combinations to obtain predicted probabilities of default
combos['predict']=result.predict(combos)

#return the predicted probability of default for the mean income level, 
#and for each level of balance and student status
grouped = pd.pivot_table(combos,values=['predict'],rows=['balance','student'],aggfunc=np.mean)
#select only data with 'student'=1
plt.figure()
plt_data=grouped.ix[grouped.index.get_level_values(1)==1]
#plot predicted probability of default for 'student'=1
plt.plot(plt_data.index.get_level_values(0),plt_data['predict'],color='b')
#select only data with 'student'=0
plt_data=grouped.ix[grouped.index.get_level_values(1)==0]
#plot predicted probability of default for 'student'=0
    def disaggregate(self, mains, output_datastore, **load_kwargs):
        '''Disaggregate mains according to the model learnt previously.

        Parameters
        ----------
        mains : nilmtk.ElecMeter or nilmtk.MeterGroup
        output_datastore : instance of nilmtk.DataStore subclass
            For storing power predictions from disaggregation algorithm.
        output_name : string, optional
            The `name` to use in the metadata for the `output_datastore`.
            e.g. some sort of name for this experiment.  Defaults to 
            "NILMTK_CO_<date>"
        resample_seconds : number, optional
            The desired sample period in seconds.
        **load_kwargs : key word arguments
            Passed to `mains.power_series(**kwargs)`
        '''
        MIN_CHUNK_LENGTH = 100

        if not self.model:
            raise RuntimeError("The model needs to be instantiated before"
                               " calling `disaggregate`.  For example, the"
                               " model can be instantiated by running `train`.")

        # If we import sklearn at the top of the file then auto doc fails.
        from sklearn.utils.extmath import cartesian

        # sklearn produces lots of DepreciationWarnings with PyTables
        import warnings
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # Extract optional parameters from load_kwargs
        date_now = datetime.now().isoformat().split('.')[0]
        output_name = load_kwargs.pop('output_name', 'NILMTK_CO_' + date_now)
        resample_seconds = load_kwargs.pop('resample_seconds', 60)

        # Get centroids
        centroids = [model['states'] for model in self.model]
        state_combinations = cartesian(centroids)
        # state_combinations is a 2D array
        # each column is a chan
        # each row is a possible combination of power demand values e.g.
        # [[0, 0, 0, 0], [0, 0, 0, 100], [0, 0, 50, 0], [0, 0, 50, 100], ...]

        # Add vampire power to the model
        vampire_power = mains.vampire_power()
        if printing:
            print("vampire_power = {} watts".format(vampire_power))
        n_rows = state_combinations.shape[0]
        vampire_power_array = np.zeros((n_rows, 1)) + vampire_power
        state_combinations = np.hstack((state_combinations, vampire_power_array))

        summed_power_of_each_combination = np.sum(state_combinations, axis=1)
        # summed_power_of_each_combination is now an array where each
        # value is the total power demand for each combination of states.

        load_kwargs['sections'] = load_kwargs.pop('sections',
                                                  mains.good_sections())
        resample_rule = '{:d}S'.format(resample_seconds)
        timeframes = []
        building_path = '/building{}'.format(mains.building())
        mains_data_location = '{}/elec/meter1'.format(building_path)

        for chunk in mains.power_series(**load_kwargs):

            # Check that chunk is sensible size before resampling
            if len(chunk) < MIN_CHUNK_LENGTH:
                continue

            # Record metadata
            timeframes.append(chunk.timeframe)
            measurement = chunk.name

            chunk = chunk.resample(rule=resample_rule)
            # Check chunk size *again* after resampling
            if len(chunk) < MIN_CHUNK_LENGTH:
                continue

            # Start disaggregation
            indices_of_state_combinations, residual_power = find_nearest(
                summed_power_of_each_combination, chunk.values)

            for i, model in enumerate(self.model):
                if printing:
                    print("Estimating power demand for '{}'".format(model['training_metadata']))
                predicted_power = state_combinations[
                    indices_of_state_combinations, i].flatten()
                cols = pd.MultiIndex.from_tuples([chunk.name])
                meter_instance = model['training_metadata'].instance()
                output_datastore.append('{}/elec/meter{}'
                                        .format(building_path, meter_instance),
                                        pd.DataFrame(predicted_power,
                                                     index=chunk.index,
                                                     columns=cols))

            # Copy mains data to disag output
            output_datastore.append(key=mains_data_location,
                                    value=pd.DataFrame(chunk, columns=cols))

        ##################################
        # Add metadata to output_datastore

        # TODO: `preprocessing_applied` for all meters
        # TODO: split this metadata code into a separate function
        # TODO: submeter measurement should probably be the mains
        #       measurement we used to train on, not the mains measurement.

        # DataSet and MeterDevice metadata:
        
        #Add metadata for main meter
        mains_meter = mains.metadata['device_model'] if hasattr(mains, 'metadata') else 'mains'
        
        meter_devices = {
            'CO': {
                'model': 'CO',
                'sample_period': resample_seconds,
                'max_sample_period': resample_seconds,
                'measurements': [{
                    'physical_quantity': measurement[0],
                    'type': measurement[1]
                }]
            },
            'mains': {
                'model': mains_meter,
                'sample_period': resample_seconds,
                'max_sample_period': resample_seconds,
                'measurements': [{
                    'physical_quantity': measurement[0],
                    'type': measurement[1]
                }]
            }
        }

        merged_timeframes = merge_timeframes(timeframes, gap=resample_seconds)
        total_timeframe = TimeFrame(merged_timeframes[0].start,
                                    merged_timeframes[-1].end)

        dataset_metadata = {'name': output_name, 'date': date_now,
                            'meter_devices': meter_devices,
                            'timeframe': total_timeframe.to_dict()}
        output_datastore.save_metadata('/', dataset_metadata)

        # Building metadata

        # Mains meter:
        elec_meters = {
            1: {
                'device_model': mains_meter,
                'site_meter': True,
                'data_location': mains_data_location,
                'preprocessing_applied': {},  # TODO
                'statistics': {
                    'timeframe': total_timeframe.to_dict(),
                    'good_sections': list_of_timeframe_dicts(merged_timeframes)
                }
            }
        }

        # Appliances and submeters:
        appliances = []
        for model in self.model:
            meter = model['training_metadata']

            meter_instance = meter.instance()

            for app in meter.appliances:
                meters = app.metadata['meters']
                appliance = {
                    'meters': [meter_instance], 
                    'type': app.identifier.type,
                    'instance': app.identifier.instance
                    # TODO this `instance` will only be correct when the
                    # model is trained on the same house as it is tested on.
                    # https://github.com/nilmtk/nilmtk/issues/194
                }
                appliances.append(appliance)

            elec_meters.update({
                meter_instance: {
                    'device_model': 'CO',
                    'submeter_of': 1,
                    'data_location': ('{}/elec/meter{}'
                                      .format(building_path, meter_instance)),
                    'preprocessing_applied': {},  # TODO
                    'statistics': {
                        'timeframe': total_timeframe.to_dict(),
                        'good_sections': list_of_timeframe_dicts(merged_timeframes)
                    }
                }
            })

        building_metadata = {
            'instance': mains.building(),
            'elec_meters': elec_meters,
            'appliances': appliances
        }

        output_datastore.save_metadata(building_path, building_metadata)
Esempio n. 59
0
def calc_cartesian_alpha(alpha, index, n_groups_list):
    if index < 0:
        return np.array([alpha for _ in range(np.prod(n_groups_list))])
    else:
        cart = cartesian([range(n_groups) for n_groups in n_groups_list])
        return np.array([alpha[i] for i in cart[:, index]])
    def disaggregate(self, mains, output_datastore, location_data=None, mains_values=None, baseline=None, **load_kwargs):

        from sklearn.utils.extmath import cartesian
        import warnings
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # Get centroids
        centroids = [model['states'] for model in self.model]
        state_combinations = cartesian(centroids)

        try:
            timezone = location_data.dataset.metadata.get('timezone')
        except Exception:
            timezone = ''

        vampire_power = baseline
        if baseline is None:
            vampire_power = mains.vampire_power() #- correction
        n_rows = state_combinations.shape[0]
        vampire_power_array = np.zeros((n_rows, 1)) + vampire_power
        state_combinations = np.hstack((state_combinations, vampire_power_array))
        print("vampire_power = {} watts".format(vampire_power))        
        summed_power_of_each_combination = np.sum(state_combinations, axis=1)
        
        self.vampire_power = vampire_power
        self.state_combinations_all = state_combinations
        self.summed_power_of_each_combination_all = summed_power_of_each_combination

                
        resample_seconds = load_kwargs.pop('resample_seconds', 60)
        load_kwargs.setdefault('resample', True)
        load_kwargs.setdefault('sample_period', resample_seconds)
        timeframes = []
        building_path = '/building{}'.format(mains.building())
        mains_data_location = '{}/elec/meter1'.format(building_path)

        if mains_values is None:
            load_kwargs['sections'] = load_kwargs.pop('sections', mains.good_sections())
            mains_values = mains.power_series(**load_kwargs)
            using_series = False
        else:
            mains_values = [mains_values]
            using_series = True
        
        self.mains_used = mains_values        
        
        self.location_used = 0
        self.location_loop = 0
        self.co_indices_original = []
        self.co_indices_location = [] #No longer applies since indices constantly change after each iteration. We now return the combo
        self.co_residuals_original = []
        self.co_residuals_location = []
        self.co_combos_location = []
        for chunk in mains_values:


            # Record metadata
            if using_series:
                timeframes.append(TimeFrame(start=chunk.index[0], end=chunk.index[-1]))
                measurement = ('power', 'apparent')
            else:
                timeframes.append(chunk.timeframe)
                measurement = chunk.name

            # Start disaggregation
            print('Calculating original indices of state combinations...')
            indices_of_state_combinations_original, residuals_power_original = find_nearest(
            summed_power_of_each_combination, chunk.values)            
            
            self.co_indices_original.extend(indices_of_state_combinations_original)
            self.co_residuals_original.extend(residuals_power_original)
            
            print('Calculating indices of state combinations...')
            state_combinations_location, residuals_power_location = self.find_nearest(
            chunk, location_data, vampire_power, resample_seconds)
            
            self.co_combos_location.extend(state_combinations_location)
            self.co_residuals_location.extend(residuals_power_location)
            
            #Write results
            for i, model in enumerate(self.model):
                print("Estimating power demand for '{}'".format(model['training_metadata']))
                predicted_power = state_combinations_location[:, i].flatten()
                cols = pd.MultiIndex.from_tuples([measurement])
                meter_instance = model['training_metadata'].instance()
                output_datastore.append('{}/elec/meter{}'
                                        .format(building_path, meter_instance),
                                        pd.DataFrame(predicted_power,
                                                     index=chunk.index,
                                                     columns=cols))

            # Copy mains data to disag output
            output_datastore.append(key=mains_data_location,
                                    value=pd.DataFrame(chunk, columns=cols))
        
        
        ##################################
        # Add metadata to output_datastore
        self.add_metadata(output_datastore, measurement, timeframes, mains, timezone, load_kwargs)