def predict(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=False): # Predict is currently the same as impute except that the row Id in the query must lie outside the # length of the table used to generate the model # For now, we will just call "impute" and leave it to the user to generate the query correctly # FIXME: allow more than one cell to be predicted assert(len(Q)==1) if return_samples: e, samples = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=True) else: e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed) return e
def impute(self, M_c, X_L, X_D, Y, Q, seed, n): """Impute values from the predictive distribution of the given latent state :param seed: The random seed :type seed: int :param M_c: The column metadata :type M_c: dict :param X_L: the latent variables associated with the latent state :type X_L: dict :param X_D: the particular cluster assignments of each row in each view :type X_D: list of lists :param Y: A list of constraints to apply when sampling. Each constraint is a triplet of (r,d,v): r is the row index, d is the column index and v is the value of the constraint :type Y: list of lists :param Q: A list of values to sample. Each value is doublet of (r, d): r is the row index, d is the column index :type Q: list of lists :param n: the number of samples to use in the imputation :type n: int :returns: list of floats -- imputed values in the same order as specified by Q """ get_next_seed = make_get_next_seed(seed) e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed) return e
def predict(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=False): # Predict is currently the same as impute except that the row Id in the query must lie outside the # length of the table used to generate the model # For now, we will just call "impute" and leave it to the user to generate the query correctly # FIXME: allow more than one cell to be predicted assert (len(Q) == 1) if return_samples: e, samples = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=True) else: e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed) return e
def impute(self, M_c, X_L, X_D, Y, Q, seed, n): """Impute values from predictive distribution of the given latent state. :param Y: A list of constraints to apply when sampling. Each constraint is a triplet of (r,d,v): r is the row index, d is the column index and v is the value of the constraint :type Y: list of lists :param Q: A list of values to sample. Each value is doublet of (r, d): r is the row index, d is the column index :type Q: list of lists :param n: the number of samples to use in the imputation :type n: int :returns: list of floats -- imputed values in the same order as specified by Q """ get_next_seed = make_get_next_seed(seed) e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed) return e
def impute_table(T, M_c, X_L_list, X_D_list, numDraws, get_next_seed): T_imputed = copy(T) num_rows = len(T) num_cols = len(T[0]) # Identify column types col_names = numpy.array( [M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)]) coltype = [] for colindx in range(len(col_names)): if M_c['column_metadata'][colindx][ 'modeltype'] == 'normal_inverse_gamma': coltype.append('continuous') else: coltype.append('multinomial') rowsWithNans = [i for i in range(len(T)) if any(isnan_mixedtype(T[i]))] print rowsWithNans Q = [] for x in rowsWithNans: y = [y for y in range(len(T[0])) if isnan_mixedtype([T[x][y]])] Q.extend(zip([x] * len(y), y)) numImputations = len(Q) # Impute missing values in table values_list = [] for queryindx in range(len(Q)): values = su.impute(M_c, X_L_list, X_D_list, [], [Q[queryindx]], numDraws, get_next_seed) values_list.append(values) # Put the samples back into the data table for imputeindx in range(numImputations): imputed_value = values_list[imputeindx] if coltype[Q[imputeindx][1]] == 'multinomial': imputed_value = M_c['column_metadata'][ Q[imputeindx][1]]['value_to_code'][imputed_value] T_imputed[Q[imputeindx][0]][Q[imputeindx][1]] = imputed_value return T_imputed
def impute_table(T, M_c, X_L_list, X_D_list, numDraws, get_next_seed): T_imputed = copy(T) num_rows = len(T) num_cols = len(T[0]) # Identify column types col_names = numpy.array([M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)]) coltype = [] for colindx in range(len(col_names)): if M_c['column_metadata'][colindx]['modeltype'] == 'normal_inverse_gamma': coltype.append('continuous') else: coltype.append('multinomial') rowsWithNans = [i for i in range(len(T)) if any(isnan_mixedtype(T[i]))] print(rowsWithNans) Q = [] for x in rowsWithNans: y = [y for y in range(len(T[0])) if isnan_mixedtype([T[x][y]])] Q.extend(zip([x]*len(y), y)) numImputations = len(Q) # Impute missing values in table values_list = [] for queryindx in range(len(Q)): values = su.impute(M_c, X_L_list, X_D_list, [], [Q[queryindx]], numDraws, get_next_seed) values_list.append(values) # Put the samples back into the data table for imputeindx in range(numImputations): imputed_value = values_list[imputeindx] if coltype[Q[imputeindx][1]] == 'multinomial': imputed_value = M_c['column_metadata'][Q[imputeindx][1]]['value_to_code'][imputed_value] T_imputed[Q[imputeindx][0]][Q[imputeindx][1]] = imputed_value return T_imputed