Ejemplo n.º 1
0
    def impute_and_confidence(self, M_c, X_L, X_D, Y, Q, n):
        """Impute values and confidence of the value from the predictive
        distribution of the given latent state

        :param M_c: The column metadata
        :type M_c: dict
        :param X_L: the latent variables associated with the latent state
        :type X_L: dict
        :param X_D: the particular cluster assignments of each row in each view
        :type X_D: list of lists
        :param Y: A list of constraints to apply when sampling.  Each constraint
                  is a triplet of (r, d, v): r is the row index, d is the column
                  index and v is the value of the constraint
        :type Y: list of lists
        :param Q: A list of values to sample.  Each value is doublet of (r, d):
                  r is the row index, d is the column index
        :type Q: list of lists
        :param n: the number of samples to use in the imputation
        :type n: int
        :returns: list of lists -- list of (value, confidence) tuples in the
                  same order as specified by Q

        """
        if isinstance(X_L, (list, tuple)):
            assert isinstance(X_D, (list, tuple))
            # TODO: multistate impute doesn't exist yet
            #e,confidence = su.impute_and_confidence_multistate(M_c, X_L, X_D, Y, Q, n, self.get_next_seed)
            e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n,
                                                     self.get_next_seed)
        else:
            e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n,
                                                     self.get_next_seed)
        return (e, confidence)
Ejemplo n.º 2
0
    def impute_and_confidence(self, M_c, X_L, X_D, Y, Q, n):
        """Impute values and confidence of the value from the predictive
        distribution of the given latent state

        :param M_c: The column metadata
        :type M_c: dict
        :param X_L: the latent variables associated with the latent state
        :type X_L: dict
        :param X_D: the particular cluster assignments of each row in each view
        :type X_D: list of lists
        :param Y: A list of constraints to apply when sampling.  Each constraint
                  is a triplet of (r, d, v): r is the row index, d is the column
                  index and v is the value of the constraint
        :type Y: list of lists
        :param Q: A list of values to sample.  Each value is doublet of (r, d):
                  r is the row index, d is the column index
        :type Q: list of lists
        :param n: the number of samples to use in the imputation
        :type n: int
        :returns: list of lists -- list of (value, confidence) tuples in the
                  same order as specified by Q

        """
        if isinstance(X_L, (list, tuple)):
            assert isinstance(X_D, (list, tuple))
            # TODO: multistate impute doesn't exist yet
            # e,confidence = su.impute_and_confidence_multistate(M_c, X_L, X_D, Y, Q, n,
            #                                                    self.get_next_seed)
            e, confidence = su.impute_and_confidence(
                M_c, X_L, X_D, Y, Q, n, self.get_next_seed)
        else:
            e, confidence = su.impute_and_confidence(
                M_c, X_L, X_D, Y, Q, n, self.get_next_seed)
        return (e, confidence)
Ejemplo n.º 3
0
    def impute_and_confidence(self, M_c, X_L, X_D, Y, Q, seed, n):
        """Impute values and confidence of the value from the predictive
        distribution of the given latent state.

        :param Y: A list of constraints to apply when sampling.  Each constraint
            is a triplet of (r, d, v): r is the row index, d is the column
            index and v is the value of the constraint
        :type Y: list of lists
        :param Q: A list of values to sample.  Each value is doublet of (r, d):
            r is the row index, d is the column index
        :type Q: list of lists
        :param n: the number of samples to use in the imputation
        :type n: int

        :returns: list of lists -- list of (value, confidence) tuples in the
            same order as specified by Q
        """
        get_next_seed = make_get_next_seed(seed)
        if isinstance(X_L, (list, tuple)):
            assert isinstance(X_D, (list, tuple))
            # TODO: multistate impute doesn't exist yet
            # e,confidence = su.impute_and_confidence_multistate(
            #   M_c, X_L, X_D, Y, Q, n, self.get_next_seed)
            e, confidence = su.impute_and_confidence(
                M_c, X_L, X_D, Y, Q, n, get_next_seed)
        else:
            e, confidence = su.impute_and_confidence(
                M_c, X_L, X_D, Y, Q, n, get_next_seed)
        return (e, confidence)
random_state = numpy.random.RandomState(inf_seed)
# FIXME: getting weird error on conversion to int: too large from inside pyx
def get_next_seed(max_val=32767): # sys.maxint):
    return random_state.randint(max_val)

# resume from saved name
save_dict = fu.unpickle(pkl_name)
M_c = save_dict['M_c']
X_L = save_dict['X_L']
X_D = save_dict['X_D']
T = save_dict['T']
num_cols = len(X_L['column_partition']['assignments'])
row_idx = 205
col_idx = 13
Q = [(row_idx, col_idx)]
imputed, confidence = su.impute_and_confidence(
    M_c, X_L, X_D, Y=None, Q=Q, n=400, get_next_seed=get_next_seed)

T_array = numpy.array(T)
which_view_idx = X_L['column_partition']['assignments'][col_idx]
X_D_i = numpy.array(X_D[which_view_idx])
which_cluster_idx = X_D_i[row_idx]
which_rows_match_indices = numpy.nonzero(X_D_i==which_cluster_idx)[0]
cluster_vals = T_array[which_rows_match_indices, col_idx]
all_vals = T_array[:, col_idx]
cluster_counter = Counter(cluster_vals)
cluster_ratio = float(cluster_counter[imputed]) / sum(cluster_counter.values())
all_counter = Counter(all_vals)
all_ratio = float(all_counter[imputed]) / sum(all_counter.values())
print
print 'imputed: %s' % imputed
print 'all_ratio: %s' % all_ratio
Ejemplo n.º 5
0

# resume from saved name
save_dict = fu.unpickle(pkl_name)
M_c = save_dict['M_c']
X_L = save_dict['X_L']
X_D = save_dict['X_D']
T = save_dict['T']
num_cols = len(X_L['column_partition']['assignments'])
row_idx = 205
col_idx = 13
Q = [(row_idx, col_idx)]
imputed, confidence = su.impute_and_confidence(M_c,
                                               X_L,
                                               X_D,
                                               Y=None,
                                               Q=Q,
                                               n=400,
                                               get_next_seed=get_next_seed)

T_array = numpy.array(T)
which_view_idx = X_L['column_partition']['assignments'][col_idx]
X_D_i = numpy.array(X_D[which_view_idx])
which_cluster_idx = X_D_i[row_idx]
which_rows_match_indices = numpy.nonzero(X_D_i == which_cluster_idx)[0]
cluster_vals = T_array[which_rows_match_indices, col_idx]
all_vals = T_array[:, col_idx]
cluster_counter = Counter(cluster_vals)
cluster_ratio = float(cluster_counter[imputed]) / sum(cluster_counter.values())
all_counter = Counter(all_vals)
all_ratio = float(all_counter[imputed]) / sum(all_counter.values())
Ejemplo n.º 6
0
def predict_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed):
    # FIXME: allow more than one cell to be predicted
    assert (len(Q) == 1)
    e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n,
                                             get_next_seed)
    return e, confidence
Ejemplo n.º 7
0
def predict_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed):
    # FIXME: allow more than one cell to be predicted
    assert(len(Q)==1)
    e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed)
    return e, confidence