def impute_and_confidence(self, M_c, X_L, X_D, Y, Q, n): """Impute values and confidence of the value from the predictive distribution of the given latent state :param M_c: The column metadata :type M_c: dict :param X_L: the latent variables associated with the latent state :type X_L: dict :param X_D: the particular cluster assignments of each row in each view :type X_D: list of lists :param Y: A list of constraints to apply when sampling. Each constraint is a triplet of (r, d, v): r is the row index, d is the column index and v is the value of the constraint :type Y: list of lists :param Q: A list of values to sample. Each value is doublet of (r, d): r is the row index, d is the column index :type Q: list of lists :param n: the number of samples to use in the imputation :type n: int :returns: list of lists -- list of (value, confidence) tuples in the same order as specified by Q """ if isinstance(X_L, (list, tuple)): assert isinstance(X_D, (list, tuple)) # TODO: multistate impute doesn't exist yet #e,confidence = su.impute_and_confidence_multistate(M_c, X_L, X_D, Y, Q, n, self.get_next_seed) e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n, self.get_next_seed) else: e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n, self.get_next_seed) return (e, confidence)
def impute_and_confidence(self, M_c, X_L, X_D, Y, Q, n): """Impute values and confidence of the value from the predictive distribution of the given latent state :param M_c: The column metadata :type M_c: dict :param X_L: the latent variables associated with the latent state :type X_L: dict :param X_D: the particular cluster assignments of each row in each view :type X_D: list of lists :param Y: A list of constraints to apply when sampling. Each constraint is a triplet of (r, d, v): r is the row index, d is the column index and v is the value of the constraint :type Y: list of lists :param Q: A list of values to sample. Each value is doublet of (r, d): r is the row index, d is the column index :type Q: list of lists :param n: the number of samples to use in the imputation :type n: int :returns: list of lists -- list of (value, confidence) tuples in the same order as specified by Q """ if isinstance(X_L, (list, tuple)): assert isinstance(X_D, (list, tuple)) # TODO: multistate impute doesn't exist yet # e,confidence = su.impute_and_confidence_multistate(M_c, X_L, X_D, Y, Q, n, # self.get_next_seed) e, confidence = su.impute_and_confidence( M_c, X_L, X_D, Y, Q, n, self.get_next_seed) else: e, confidence = su.impute_and_confidence( M_c, X_L, X_D, Y, Q, n, self.get_next_seed) return (e, confidence)
def impute_and_confidence(self, M_c, X_L, X_D, Y, Q, seed, n): """Impute values and confidence of the value from the predictive distribution of the given latent state. :param Y: A list of constraints to apply when sampling. Each constraint is a triplet of (r, d, v): r is the row index, d is the column index and v is the value of the constraint :type Y: list of lists :param Q: A list of values to sample. Each value is doublet of (r, d): r is the row index, d is the column index :type Q: list of lists :param n: the number of samples to use in the imputation :type n: int :returns: list of lists -- list of (value, confidence) tuples in the same order as specified by Q """ get_next_seed = make_get_next_seed(seed) if isinstance(X_L, (list, tuple)): assert isinstance(X_D, (list, tuple)) # TODO: multistate impute doesn't exist yet # e,confidence = su.impute_and_confidence_multistate( # M_c, X_L, X_D, Y, Q, n, self.get_next_seed) e, confidence = su.impute_and_confidence( M_c, X_L, X_D, Y, Q, n, get_next_seed) else: e, confidence = su.impute_and_confidence( M_c, X_L, X_D, Y, Q, n, get_next_seed) return (e, confidence)
random_state = numpy.random.RandomState(inf_seed) # FIXME: getting weird error on conversion to int: too large from inside pyx def get_next_seed(max_val=32767): # sys.maxint): return random_state.randint(max_val) # resume from saved name save_dict = fu.unpickle(pkl_name) M_c = save_dict['M_c'] X_L = save_dict['X_L'] X_D = save_dict['X_D'] T = save_dict['T'] num_cols = len(X_L['column_partition']['assignments']) row_idx = 205 col_idx = 13 Q = [(row_idx, col_idx)] imputed, confidence = su.impute_and_confidence( M_c, X_L, X_D, Y=None, Q=Q, n=400, get_next_seed=get_next_seed) T_array = numpy.array(T) which_view_idx = X_L['column_partition']['assignments'][col_idx] X_D_i = numpy.array(X_D[which_view_idx]) which_cluster_idx = X_D_i[row_idx] which_rows_match_indices = numpy.nonzero(X_D_i==which_cluster_idx)[0] cluster_vals = T_array[which_rows_match_indices, col_idx] all_vals = T_array[:, col_idx] cluster_counter = Counter(cluster_vals) cluster_ratio = float(cluster_counter[imputed]) / sum(cluster_counter.values()) all_counter = Counter(all_vals) all_ratio = float(all_counter[imputed]) / sum(all_counter.values()) print print 'imputed: %s' % imputed print 'all_ratio: %s' % all_ratio
# resume from saved name save_dict = fu.unpickle(pkl_name) M_c = save_dict['M_c'] X_L = save_dict['X_L'] X_D = save_dict['X_D'] T = save_dict['T'] num_cols = len(X_L['column_partition']['assignments']) row_idx = 205 col_idx = 13 Q = [(row_idx, col_idx)] imputed, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y=None, Q=Q, n=400, get_next_seed=get_next_seed) T_array = numpy.array(T) which_view_idx = X_L['column_partition']['assignments'][col_idx] X_D_i = numpy.array(X_D[which_view_idx]) which_cluster_idx = X_D_i[row_idx] which_rows_match_indices = numpy.nonzero(X_D_i == which_cluster_idx)[0] cluster_vals = T_array[which_rows_match_indices, col_idx] all_vals = T_array[:, col_idx] cluster_counter = Counter(cluster_vals) cluster_ratio = float(cluster_counter[imputed]) / sum(cluster_counter.values()) all_counter = Counter(all_vals) all_ratio = float(all_counter[imputed]) / sum(all_counter.values())
def predict_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed): # FIXME: allow more than one cell to be predicted assert (len(Q) == 1) e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed) return e, confidence
def predict_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed): # FIXME: allow more than one cell to be predicted assert(len(Q)==1) e, confidence = su.impute_and_confidence(M_c, X_L, X_D, Y, Q, n, get_next_seed) return e, confidence