コード例 #1
0
def predict(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=False):
    # Predict is currently the same as impute except that the row Id in the query must lie outside the 
    # length of the table used to generate the model
    # For now, we will just call "impute" and leave it to the user to generate the query correctly 
    
    # FIXME: allow more than one cell to be predicted
    assert(len(Q)==1)
    if return_samples:
        e, samples = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=True)
    else:
        e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed)
    return e
コード例 #2
0
ファイル: LocalEngine.py プロジェクト: joshnr13/crosscat
    def impute(self, M_c, X_L, X_D, Y, Q, seed, n):
        """Impute values from the predictive distribution of the given latent state

        :param seed: The random seed
        :type seed: int
        :param M_c: The column metadata
        :type M_c: dict
        :param X_L: the latent variables associated with the latent state
        :type X_L: dict
        :param X_D: the particular cluster assignments of each row in each view
        :type X_D: list of lists
        :param Y: A list of constraints to apply when sampling.  Each constraint
                  is a triplet of (r,d,v): r is the row index, d is the column
                  index and v is the value of the constraint
        :type Y: list of lists
        :param Q: A list of values to sample.  Each value is doublet of (r, d):
                  r is the row index, d is the column index
        :type Q: list of lists
        :param n: the number of samples to use in the imputation
        :type n: int
        :returns: list of floats -- imputed values in the same order as
                  specified by Q

        """
        get_next_seed = make_get_next_seed(seed)
        e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed)
        return e
コード例 #3
0
def predict(M_c, X_L, X_D, Y, Q, n, get_next_seed, return_samples=False):
    # Predict is currently the same as impute except that the row Id in the query must lie outside the
    # length of the table used to generate the model
    # For now, we will just call "impute" and leave it to the user to generate the query correctly

    # FIXME: allow more than one cell to be predicted
    assert (len(Q) == 1)
    if return_samples:
        e, samples = su.impute(M_c,
                               X_L,
                               X_D,
                               Y,
                               Q,
                               n,
                               get_next_seed,
                               return_samples=True)
    else:
        e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed)
    return e
コード例 #4
0
ファイル: LocalEngine.py プロジェクト: avinson/crosscat
    def impute(self, M_c, X_L, X_D, Y, Q, seed, n):
        """Impute values from predictive distribution of the given latent state.

        :param Y: A list of constraints to apply when sampling.  Each constraint
            is a triplet of (r,d,v): r is the row index, d is the column
            index and v is the value of the constraint
        :type Y: list of lists
        :param Q: A list of values to sample.  Each value is doublet of (r, d):
                  r is the row index, d is the column index
        :type Q: list of lists
        :param n: the number of samples to use in the imputation
        :type n: int

        :returns: list of floats -- imputed values in the same order as
            specified by Q
        """
        get_next_seed = make_get_next_seed(seed)
        e = su.impute(M_c, X_L, X_D, Y, Q, n, get_next_seed)
        return e
コード例 #5
0
def impute_table(T, M_c, X_L_list, X_D_list, numDraws, get_next_seed):
    T_imputed = copy(T)
    num_rows = len(T)
    num_cols = len(T[0])
    # Identify column types
    col_names = numpy.array(
        [M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)])
    coltype = []
    for colindx in range(len(col_names)):
        if M_c['column_metadata'][colindx][
                'modeltype'] == 'normal_inverse_gamma':
            coltype.append('continuous')
        else:
            coltype.append('multinomial')

    rowsWithNans = [i for i in range(len(T)) if any(isnan_mixedtype(T[i]))]
    print rowsWithNans
    Q = []
    for x in rowsWithNans:
        y = [y for y in range(len(T[0])) if isnan_mixedtype([T[x][y]])]
        Q.extend(zip([x] * len(y), y))

    numImputations = len(Q)
    # Impute missing values in table
    values_list = []
    for queryindx in range(len(Q)):
        values = su.impute(M_c, X_L_list, X_D_list, [], [Q[queryindx]],
                           numDraws, get_next_seed)
        values_list.append(values)

    # Put the samples back into the data table
    for imputeindx in range(numImputations):
        imputed_value = values_list[imputeindx]
        if coltype[Q[imputeindx][1]] == 'multinomial':
            imputed_value = M_c['column_metadata'][
                Q[imputeindx][1]]['value_to_code'][imputed_value]
        T_imputed[Q[imputeindx][0]][Q[imputeindx][1]] = imputed_value

    return T_imputed
コード例 #6
0
def impute_table(T, M_c, X_L_list, X_D_list, numDraws, get_next_seed):
    T_imputed = copy(T)
    num_rows = len(T)
    num_cols = len(T[0])
    # Identify column types
    col_names = numpy.array([M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)])
    coltype = []
    for colindx in range(len(col_names)):
        if M_c['column_metadata'][colindx]['modeltype'] == 'normal_inverse_gamma':
            coltype.append('continuous')
        else:
            coltype.append('multinomial')

    rowsWithNans = [i for i in range(len(T)) if any(isnan_mixedtype(T[i]))]
    print(rowsWithNans)
    Q = []
    for x in rowsWithNans:
        y = [y for y in range(len(T[0])) if isnan_mixedtype([T[x][y]])]
        Q.extend(zip([x]*len(y), y)) 

    numImputations = len(Q)
    # Impute missing values in table
    values_list = []
    for queryindx in range(len(Q)):
        values = su.impute(M_c, X_L_list, X_D_list, [], [Q[queryindx]], numDraws, get_next_seed)
        values_list.append(values)

    
    # Put the samples back into the data table
    for imputeindx in range(numImputations):
        imputed_value = values_list[imputeindx]
        if coltype[Q[imputeindx][1]] == 'multinomial':
            imputed_value = M_c['column_metadata'][Q[imputeindx][1]]['value_to_code'][imputed_value]
        T_imputed[Q[imputeindx][0]][Q[imputeindx][1]] = imputed_value

    return T_imputed