Example #1
0
 def _calc_affinity(self,data,affinity_type,**kwargs):
     if affinity_type == run_quest.INIT_AFF_COS_SIM: #cosine similarity
         affinity_matrix = affinity.mutual_cosine_similarity(data,**kwargs)
     elif affinity_type == run_quest.DUAL_EMD: #EMD
         emd = dual_affinity.calc_emd(data,**kwargs)
         affinity_matrix = dual_affinity.emd_dual_aff(emd)
     return affinity_matrix
Example #2
0
def break_node(train_data,
               col_tree_node,
               row_tree,
               regressors=None,
               k=5,
               alpha=0.0,
               beta=1.0,
               col_emd=None):
    """
    First calculates the EMD on the columns of train_data 
    in col_tree_node.elements using row_tree. Converts that to an affinity.
    Calculates the second eigenvector of the markov matrix based on that
    affinity.
    Then fits a linear model using the rows in regressors (all if it's None)
    and uses the LASSO path to identify the best k rows.
    Splits the node using the predicted eigenvector values.
    """
    import sklearn.linear_model as sklm

    col_indices = col_tree_node.elements
    node_data = train_data[:, col_indices].astype(np.float64)

    if col_emd is None:
        col_emd = dual_affinity.calc_emd(node_data, row_tree, alpha, beta)
        col_aff = dual_affinity.emd_dual_aff(col_emd)
    else:
        col_aff = dual_affinity.emd_dual_aff(
            col_emd[:, col_indices][col_tree_node.elements, :])

    vecs, _ = markov.markov_eigs(col_aff, 2)
    eig = vecs[:, 1]

    if regressors is None:
        regressors = range(row_tree.size)

    _, active, _ = sklm.lars_path(node_data[regressors, :].T, eig, max_iter=50)

    regr_indices = active[0:k]

    lm = sklm.LinearRegression()
    lm.fit(node_data[regr_indices, :].T, eig)
    pred_eigs = lm.predict(node_data[regr_indices, :].T)

    labels = pred_eigs > 0.0
    partition = labels * np.ones(labels.shape[0])
    col_tree_node.create_subclusters(partition)
    return np.array([regressors[x] for x in regr_indices]), lm
Example #3
0
def pyquest_newtree(data,tree_constant=0.25,row_alpha=0.5,col_alpha=0.5,beta=1.0,n_iters=3):

    init_row_aff = affinity.mutual_cosine_similarity(data.T,False,0,threshold=0.1)
    
    #Compute diffusion embedding of initial affinities
    init_row_vecs,init_row_vals = markov.markov_eigs(init_row_aff, 12)
    init_row_vals[np.isnan(init_row_vals)] = 0.0
    row_embedding = init_row_vecs.dot(np.diag(init_row_vals))
    row_distances = spsp.distance.squareform(spsp.distance.pdist(row_embedding))
    row_affinity = np.max(row_distances) - row_distances
    
    #Generate initial tree
    #print "call1 tree_constant:{}".format(tree_constant)
    init_row_tree = tree_building.make_tree_embedding(row_affinity,tree_constant)
    
    dual_col_trees = []
    dual_row_trees = [init_row_tree]
    
    for _ in xrange(n_iters):
        #print "Beginning iteration {}".format(i)
        col_emd = dual_affinity.calc_emd(data,dual_row_trees[-1],alpha=col_alpha,beta=beta)
        col_aff = dual_affinity.emd_dual_aff(col_emd)
        #print "call2 tree_constant:{}".format(tree_constant)
        dual_col_trees.append(tree_building.make_tree_embedding(col_aff,tree_constant))
    
        row_emd = dual_affinity.calc_emd(data.T,dual_col_trees[-1],alpha=row_alpha,beta=beta)
        row_aff = dual_affinity.emd_dual_aff(row_emd)
        #print "call3 tree_constant:{}".format(tree_constant)
        dual_row_trees.append(tree_building.make_tree_embedding(row_aff,tree_constant))
        
    col_tree = dual_col_trees[-1]
    row_tree = dual_row_trees[-1]
    
    col_emd = dual_affinity.calc_emd(data,row_tree,alpha=col_alpha,beta=beta)
    row_emd = dual_affinity.calc_emd(data.T,col_tree,alpha=row_alpha,beta=beta)
    
    row_aff = dual_affinity.emd_dual_aff(row_emd)
    col_aff = dual_affinity.emd_dual_aff(col_emd)
    
    row_vecs,row_vals = markov.markov_eigs(row_aff, 12)
    col_vecs,col_vals = markov.markov_eigs(col_aff, 12)   

    return row_tree,col_tree,row_vecs,col_vecs,row_vals,col_vals
Example #4
0
def break_node(train_data,col_tree_node,row_tree,regressors=None,
               k=5,alpha=0.0,beta=1.0,col_emd=None):
    """
    First calculates the EMD on the columns of train_data 
    in col_tree_node.elements using row_tree. Converts that to an affinity.
    Calculates the second eigenvector of the markov matrix based on that
    affinity.
    Then fits a linear model using the rows in regressors (all if it's None)
    and uses the LASSO path to identify the best k rows.
    Splits the node using the predicted eigenvector values.
    """
    import sklearn.linear_model as sklm

    col_indices = col_tree_node.elements
    node_data = train_data[:,col_indices].astype(np.float64)
    
    if col_emd is None:
        col_emd = dual_affinity.calc_emd(node_data,row_tree,alpha,beta)
        col_aff = dual_affinity.emd_dual_aff(col_emd)
    else:
        col_aff = dual_affinity.emd_dual_aff(col_emd[:,col_indices][col_tree_node.elements,:])
        
    vecs,_ = markov.markov_eigs(col_aff,2)
    eig = vecs[:,1]
    
    if regressors is None:
        regressors = range(row_tree.size)
    
    _,active,_ = sklm.lars_path(node_data[regressors,:].T,eig,max_iter=50)
    
    regr_indices = active[0:k]
    
    lm = sklm.LinearRegression()
    lm.fit(node_data[regr_indices,:].T,eig)
    pred_eigs = lm.predict(node_data[regr_indices,:].T)
    
    labels = pred_eigs > 0.0
    partition = labels*np.ones(labels.shape[0])
    col_tree_node.create_subclusters(partition)
    return np.array([regressors[x] for x in regr_indices]),lm
Example #5
0
def pyquest_bintree(data,row_alpha=0.5,col_alpha=0.5,beta=1.0,bal_constant=1.0,n_iters=3):
    """
    runs what is momentarily the standard questionnaire algorithm:
    initial affinity = mutual cosine similarity
    initial tree based on median of successive eigenvectors
    dual affinities based on earth mover distance.
    dual trees based on eigen_cut method
    """
    #Generate initial affinity
    init_row_aff = affinity.mutual_cosine_similarity(data.T,False,0,threshold=0.1)
    
    #Compute diffusion embedding of initial affinities
    init_row_vecs,init_row_vals = markov.markov_eigs(init_row_aff, 12)
    #Generate median trees
    init_row_tree = bintree_construct.median_tree(init_row_vecs,init_row_vals,max_levels=12)
    
    dual_col_trees = []
    dual_row_trees = [init_row_tree]
    
    for _ in xrange(n_iters):
        dual_col_trees.append(bintree_construct.old_eigen_tree(data,dual_row_trees[-1],alpha=col_alpha,beta=beta,noise=0.0))
        dual_row_trees.append(bintree_construct.old_eigen_tree(data.T,dual_col_trees[-1],alpha=row_alpha,beta=beta,noise=0.0))
#        dual_col_trees.append(bintree_construct.eigen_tree(data,dual_row_trees[-1],alpha=col_alpha,beta=beta,bal_constant=bal_constant))
#        dual_row_trees.append(bintree_construct.eigen_tree(data.T,dual_col_trees[-1],alpha=row_alpha,beta=beta,bal_constant=bal_constant))
        
    col_tree = dual_col_trees[-1]
    row_tree = dual_row_trees[-1]
    
    col_emd = dual_affinity.calc_emd(data,row_tree,alpha=0.5,beta=1.0)
    row_emd = dual_affinity.calc_emd(data.T,col_tree,alpha=0.5,beta=1.0)
    
    row_aff = dual_affinity.emd_dual_aff(row_emd)
    col_aff = dual_affinity.emd_dual_aff(col_emd)
    
    row_vecs,row_vals = markov.markov_eigs(row_aff, 12)
    col_vecs,col_vals = markov.markov_eigs(col_aff, 12)
    
    return row_tree,col_tree,row_vecs,col_vecs,row_vals,col_vals
def eigen_cut_zero(node,emd,eps=1.0):
    affinity = dual_affinity.emd_dual_aff(emd[node.elements,:][:,node.elements]
                                          ,eps)
    
    try:
        vecs,_ = markov.markov_eigs(affinity,2)
    except:
        print affinity
        print emd
        print node.elements
        raise
    eig = vecs[:,1]
    n = len(eig)
    labels = np.ones(n)
    labels *= (eig > 0.0)
    
    return labels
def bal_eigen_cut(node,emd,bal_constant=1.0,eps=1.0):
    affinity = dual_affinity.emd_dual_aff(emd[node.elements,:][:,node.elements]
                                          ,eps)
    
    try:
        vecs,_ = markov.markov_eigs(affinity,2)
    except:
        print affinity
        print emd
        print node.elements
        raise
    eig = vecs[:,1]
    eig_sorted = np.argsort(eig)
    n = len(eig)
    l,r = bal_cut(n,bal_constant)
    cut_loc = np.random.randint(l,r+1)
    labels = np.zeros(n,np.int)
    labels[eig_sorted[0:cut_loc]] = 1
    
    return labels
def eigen_cut(node,emd,noise,eps=1.0):
    affinity = dual_affinity.emd_dual_aff(emd[node.elements,:][:,node.elements]
                                          ,eps)
    
    try:
        vecs,_ = markov.markov_eigs(affinity,2)
    except:
        print affinity
        print emd
        print node.elements
        raise
    eig = vecs[:,1]
    eig_sorted = np.sort(eig)
    n = len(eig_sorted)
    rnoise = np.random.uniform(-noise,noise)
    if noise < 1e-8:
        labels = np.zeros(n)
        labels[np.argsort(eig)[0:int(n/2)]] = 1
    else:
        cut_loc = eig_sorted[int((n/2)+(rnoise*n))]
        labels = np.ones(n)*(eig > cut_loc)
    
    return labels
Example #9
0
def pyquest(data,params):
    """
    Runs the questionnaire on data with params. 
    params is a PyQuestParams object.
    """

    if params.init_aff_type == INIT_AFF_COS_SIM:
        init_row_aff = affinity.mutual_cosine_similarity(
                            data.T,False,0,threshold=params.init_aff_threshold)
    elif params.init_aff_type == INIT_AFF_GAUSSIAN:
        init_row_aff = affinity.gaussian_euclidean(
                            data.T, params.init_aff_knn, params.init_aff_epsilon)
    
    #Initial row tree
    if params.tree_type == TREE_TYPE_BINARY:
        init_row_tree = bin_tree_build.bin_tree_build(init_row_aff,'r_dyadic',
                                                      params.tree_bal_constant)
    elif params.tree_type == TREE_TYPE_FLEXIBLE:
        init_row_tree = flex_tree_build.flex_tree_diffusion(init_row_aff,
                                            params.tree_constant)
    dual_col_trees = []
    dual_row_trees = [init_row_tree]
    
    row_tree_descs = ["Initial tree"]
    col_tree_descs = []
    
    for i in xrange(params.n_iters):
        message = "Iteration {}: calculating column affinity...".format(i)

        #print "Beginning iteration {}".format(i)
        if params.col_affinity_type == DUAL_EMD:
            col_emd = dual_affinity.calc_emd(data,dual_row_trees[-1],
                     params.col_alpha,params.col_beta)
            col_aff = dual_affinity.emd_dual_aff(col_emd)
        elif params.col_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None
        
        message = "Iteration {}: calculating column tree...".format(i)

        if params.tree_type == TREE_TYPE_BINARY:
            col_tree = bin_tree_build.bin_tree_build(col_aff,'r_dyadic',
                                                     params.tree_bal_constant)
        elif params.tree_type == TREE_TYPE_FLEXIBLE:
            col_tree = flex_tree_build.flex_tree_diffusion(col_aff,
                                                           params.tree_constant) 
        dual_col_trees.append(col_tree)
        col_tree_descs.append("Iteration {}".format(i))

        message = "Iteration {}: calculating row affinity...".format(i)

        if params.row_affinity_type == DUAL_EMD:
            row_emd = dual_affinity.calc_emd(data.T,dual_col_trees[-1],
                     params.row_alpha,params.row_beta)
            row_aff = dual_affinity.emd_dual_aff(row_emd)
        elif params.row_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None
 
        message = "Iteration {}: calculating row tree...".format(i)
       
        if params.tree_type == TREE_TYPE_BINARY:
            row_tree = bin_tree_build.bin_tree_build(row_aff,'r_dyadic',
                                                     params.tree_bal_constant)
        elif params.tree_type == TREE_TYPE_FLEXIBLE:
            row_tree = flex_tree_build.flex_tree_diffusion(row_aff,
                                                           params.tree_constant) 
        dual_row_trees.append(row_tree)
        row_tree_descs.append("Iteration {}".format(i))
        quest_run_desc = "{}".format(datetime.datetime.now())

    return PyQuestRun(quest_run_desc,dual_row_trees,dual_col_trees,
                      row_tree_descs,col_tree_descs,params)
Example #10
0
def pyquest3d(data3d, params):
    """
    Runs the 3d questionnaire on data with params. 
    params is a PyQuest3DParams object.
	Order of analysis is initialization for rows and columns
	and then iterating over channels (3rd dimension), rows and columns. 
    """

    nrows, ncols, nchans = data3d.shape
    data_Y = np.reshape(data3d, (nrows, ncols * nchans), order='F')
    data_X = np.reshape(np.transpose(data3d, (0, 2, 1)),
                        (nrows * nchans, ncols),
                        order='F')

    if params.init_aff_type == INIT_AFF_COS_SIM:
        init_row_aff = affinity.mutual_cosine_similarity(
            data_Y.T, False, 0, threshold=params.init_aff_threshold)
        init_col_aff = affinity.mutual_cosine_similarity(
            data_X, False, 0, threshold=params.init_aff_threshold)
    elif params.init_aff_type == INIT_AFF_GAUSSIAN:
        init_row_aff = affinity.gaussian_euclidean(data_Y.T,
                                                   params.init_aff_knn,
                                                   params.init_aff_epsilon)
        init_col_aff = affinity.gaussian_euclidean(data_X, params.init_aff_knn,
                                                   params.init_aff_epsilon)

    #Initial row tree
    if params.row_tree_type == TREE_TYPE_BINARY:
        init_row_tree = bin_tree_build.bin_tree_build(
            init_row_aff, 'r_dyadic', params.row_tree_bal_constant)
    elif params.row_tree_type == TREE_TYPE_FLEXIBLE:
        init_row_tree = flex_tree_build.flex_tree_diffusion(
            init_row_aff, params.row_tree_constant)
    # initial column tree
    if params.col_tree_type == TREE_TYPE_BINARY:
        init_col_tree = bin_tree_build.bin_tree_build(
            init_col_aff, 'r_dyadic', params.col_tree_bal_constant)
    elif params.col_tree_type == TREE_TYPE_FLEXIBLE:
        init_col_tree = flex_tree_build.flex_tree_diffusion(
            init_col_aff, params.col_tree_constant)

    # data structure for trees. All trees calculated in the process are exported
    dual_row_trees = [init_row_tree]
    dual_col_trees = [init_col_tree]
    dual_chan_trees = []

    row_tree_descs = ["Initial tree"]
    col_tree_descs = ["Initial tree"]
    chan_tree_descs = []

    # iterate over the questionnaire starting with channels and then rows and cols in each iteration
    for i in xrange(params.n_iters):
        message = "Iteration {}: calculating channel affinity...".format(i)

        # calculating channel affinity based on row and col trees
        #print "Beginning iteration {}".format(i)
        if params.chan_affinity_type == DUAL_EMD:
            chan_emd2d = dual_affinity.calc_2demd(data3d,
                                                  init_row_tree,
                                                  init_col_tree,
                                                  row_alpha=params.row_alpha,
                                                  row_beta=params.row_beta,
                                                  col_alpha=params.col_alpha,
                                                  col_beta=params.col_beta)

            chan_aff = dual_affinity.emd_dual_aff(chan_emd2d)
            chan_tree = flex_tree_build.flex_tree_diffusion(
                chan_aff, params.chan_tree_constant)

        elif params.chan_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating column tree...".format(i)

        # constructing channel tree
        if params.chan_tree_type == TREE_TYPE_BINARY:
            chan_tree = bin_tree_build.bin_tree_build(
                chan_aff, 'r_dyadic', params.chan_tree_bal_constant)
        elif params.chan_tree_type == TREE_TYPE_FLEXIBLE:
            chan_tree = flex_tree_build.flex_tree_diffusion(
                chan_aff, params.chan_tree_constant)
        dual_chan_trees.append(chan_tree)
        chan_tree_descs.append("Iteration {}".format(i))

        # channel tree finished, now starting with rows
        message = "Iteration {}: calculating row affinity...".format(i)

        # calculate row affinity based on column and channel trees
        if params.row_affinity_type == DUAL_EMD:
            row_emd2d = dual_affinity.calc_2demd(np.transpose(
                data3d, (1, 2, 0)),
                                                 dual_col_trees[-1],
                                                 dual_chan_trees[-1],
                                                 row_alpha=params.col_alpha,
                                                 row_beta=params.col_beta,
                                                 col_alpha=params.chan_alpha,
                                                 col_beta=params.chan_beta)
            row_aff = dual_affinity.emd_dual_aff(row_emd2d)
        elif params.row_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating row tree...".format(i)

        # constructing row tree
        if params.row_tree_type == TREE_TYPE_BINARY:
            row_tree = bin_tree_build.bin_tree_build(
                row_aff, 'r_dyadic', params.row_tree_bal_constant)
        elif params.row_tree_type == TREE_TYPE_FLEXIBLE:
            row_tree = flex_tree_build.flex_tree_diffusion(
                row_aff, params.row_tree_constant)
        dual_row_trees.append(row_tree)
        row_tree_descs.append("Iteration {}".format(i))
        quest_run_desc = "{}".format(datetime.datetime.now())

        # calculate column affinity based on row and channel trees
        if params.col_affinity_type == DUAL_EMD:
            col_emd2d = dual_affinity.calc_2demd(np.transpose(
                data3d, (0, 2, 1)),
                                                 dual_row_trees[-1],
                                                 dual_chan_trees[-1],
                                                 row_alpha=params.row_alpha,
                                                 row_beta=params.row_beta,
                                                 col_alpha=params.chan_alpha,
                                                 col_beta=params.chan_beta)
            col_aff = dual_affinity.emd_dual_aff(col_emd2d)
        elif params.col_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating column tree...".format(i)

        # constructing column tree
        if params.col_tree_type == TREE_TYPE_BINARY:
            col_tree = bin_tree_build.bin_tree_build(
                col_aff, 'r_dyadic', params.col_tree_bal_constant)
        elif params.col_tree_type == TREE_TYPE_FLEXIBLE:
            col_tree = flex_tree_build.flex_tree_diffusion(
                col_aff, params.col_tree_constant)
        dual_col_trees.append(col_tree)
        col_tree_descs.append("Iteration {}".format(i))
        quest_run_desc = "{}".format(datetime.datetime.now())

# iterations have finished, outputting structures of the tree,
# parameters

    return PyQuest3DRun(quest_run_desc, dual_row_trees, dual_col_trees,
                        dual_chan_trees, row_tree_descs, col_tree_descs,
                        chan_tree_descs, params, init_col_aff, init_row_aff,
                        row_aff, col_aff, chan_aff)
Example #11
0
def pyquest(data, params):
    """
    Runs the questionnaire on data with params. params is a PyQuestParams object.
	Starts by constructing the initial affinity on the rows of the matrix (default).
    """

    # construct row affinity
    if params.init_aff_type == INIT_AFF_COS_SIM:
        init_row_aff = affinity.mutual_cosine_similarity(
            data.T, False, 0, threshold=params.init_aff_threshold)
    elif params.init_aff_type == INIT_AFF_GAUSSIAN:
        init_row_aff = affinity.gaussian_euclidean(data.T, params.init_aff_knn,
                                                   params.init_aff_epsilon)

    #Initial row tree
    if params.row_tree_type == TREE_TYPE_BINARY:
        init_row_tree = bin_tree_build.bin_tree_build(
            init_row_aff, 'r_dyadic', params.row_tree_bal_constant)
    elif params.row_tree_type == TREE_TYPE_FLEXIBLE:
        init_row_tree = flex_tree_build.flex_tree_diffusion(
            init_row_aff, params.row_tree_constant)

    # data structure for trees. All trees calculated in the process are exported
    dual_row_trees = [init_row_tree]
    dual_col_trees = []

    row_tree_descs = ["Initial tree"]
    col_tree_descs = []

    # iterate over the questionnaire starting with columns and then rows in each iteration
    for i in xrange(params.n_iters):
        message = "Iteration {}: calculating column affinity...".format(i)
        print message

        # calculating column affinity based on row tree
        #print "Beginning iteration {}".format(i)
        if params.col_affinity_type == DUAL_EMD:
            if params.col_weighted == True:
                print "weighted emd"
                row_coefs = tree_util.tree_transform_mat(
                    dual_row_trees[-1]).dot(data)
                row_weights = np.sqrt(np.sum(row_coefs**2, axis=1))
                col_emd = dual_affinity.calc_emd(data,
                                                 dual_row_trees[-1],
                                                 alpha=0,
                                                 beta=0,
                                                 weights=row_weights)
            else:
                col_emd = dual_affinity.calc_emd(data, dual_row_trees[-1],
                                                 params.col_alpha,
                                                 params.col_beta)
            col_aff = dual_affinity.emd_dual_aff(col_emd)
        elif params.col_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating column tree...".format(i)
        print message

        # constructing column tree
        if params.col_tree_type == TREE_TYPE_BINARY:
            col_tree = bin_tree_build.bin_tree_build(
                col_aff, 'r_dyadic', params.col_tree_bal_constant)
        elif params.col_tree_type == TREE_TYPE_FLEXIBLE:
            col_tree = flex_tree_build.flex_tree_diffusion(
                col_aff, params.col_tree_constant)
        dual_col_trees.append(col_tree)
        col_tree_descs.append("Iteration {}".format(i))

        # column tree finished, now starting with rows
        message = "Iteration {}: calculating row affinity...".format(i)
        print message

        # calculate row affinity based on column tree
        if params.row_affinity_type == DUAL_EMD:
            if params.row_weighted == True:
                print "weighted emd"
                col_coefs = tree_util.tree_transform_mat(
                    dual_col_trees[-1]).dot(data.T)
                col_weights = np.sqrt(np.sum(col_coefs**2, axis=1))
                row_emd = dual_affinity.calc_emd(data.T,
                                                 dual_col_trees[-1],
                                                 alpha=0,
                                                 beta=0,
                                                 weights=col_weights)
            else:
                row_emd = dual_affinity.calc_emd(data.T, dual_col_trees[-1],
                                                 params.row_alpha,
                                                 params.row_beta)
            row_aff = dual_affinity.emd_dual_aff(row_emd)
        elif params.row_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating row tree...".format(i)
        print message

        # constructing row tree
        if params.row_tree_type == TREE_TYPE_BINARY:
            row_tree = bin_tree_build.bin_tree_build(
                row_aff, 'r_dyadic', params.row_tree_bal_constant)
        elif params.row_tree_type == TREE_TYPE_FLEXIBLE:
            row_tree = flex_tree_build.flex_tree_diffusion(
                row_aff, params.row_tree_constant)
        dual_row_trees.append(row_tree)
        row_tree_descs.append("Iteration {}".format(i))
        quest_run_desc = "{}".format(datetime.datetime.now())

# iterations have finished, outputting structures of the tree,
# parameters

    return PyQuestRun(quest_run_desc, dual_row_trees, dual_col_trees,
                      row_tree_descs, col_tree_descs, params)
Example #12
0
def pyquest(data,params):
    #params should be a PyQuestParams object

    Publisher.sendMessage("status.bar", "Calculating initial affinity...")
    if params.init_aff_type == INIT_AFF_COS_SIM:
        init_row_aff = affinity.mutual_cosine_similarity(
                            data.T,False,0,threshold=params.init_aff_threshold)
    elif params.init_aff_type == INIT_AFF_GAUSSIAN:
        #add KNN to the page
        init_row_aff = affinity.gaussian_euclidean(
                            data.T, 5, params.init_aff_epsilon)
    
    #Compute diffusion embedding of initial affinities
    init_row_vecs,init_row_vals = markov.markov_eigs(init_row_aff, 12)
    init_row_vals[np.isnan(init_row_vals)] = 0.0
    row_embedding = init_row_vecs.dot(np.diag(init_row_vals))
    row_distances = spsp.distance.squareform(spsp.distance.pdist(row_embedding))
    row_affinity = np.max(row_distances) - row_distances
    
    #Generate initial tree
    #print "call1 tree_constant:{}".format(tree_constant)
    Publisher.sendMessage("status.bar", "Calculating initial row tree...")

    if params.tree_type == TREE_TYPE_BINARY:
        init_row_tree = bintree_construct.median_tree(
                                init_row_vecs,init_row_vals,max_levels=12)
    elif params.tree_type == TREE_TYPE_FLEXIBLE:
#        init_row_tree = tree_building.make_tree_embedding(
#                                row_affinity,params.tree_constant)
        init_row_tree = tree_building.make_tree_embedding(
                                row_affinity,params.tree_constant)    
    dual_col_trees = []
    dual_row_trees = [init_row_tree]
    
    row_tree_descs = ["Initial tree"]
    col_tree_descs = []
    
    for i in xrange(params.n_iters):
        message = "Iteration {}: calculating column affinity...".format(i)
        Publisher.sendMessage("status.bar", message)

        #print "Beginning iteration {}".format(i)
        if params.col_affinity_type == DUAL_EMD:
            col_emd = dual_affinity.calc_emd(data,dual_row_trees[-1],
                     params.col_alpha,params.col_beta)
            col_aff = dual_affinity.emd_dual_aff(col_emd)
        elif params.col_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None
        
        message = "Iteration {}: calculating column tree...".format(i)
        Publisher.sendMessage("status.bar", message)

        if params.tree_type == TREE_TYPE_BINARY:
            col_tree = bintree_construct.eigen_tree(data,dual_row_trees[-1],
                    params.col_alpha,params.col_beta,params.tree_bal_constant)
        elif params.tree_type == TREE_TYPE_FLEXIBLE:
            col_tree = tree_building.make_tree_embedding(col_aff,
                                     params.tree_constant)
        dual_col_trees.append(col_tree)
        col_tree_descs.append("Iteration {}".format(i))

        message = "Iteration {}: calculating row affinity...".format(i)
        Publisher.sendMessage("status.bar", message)

        if params.row_affinity_type == DUAL_EMD:
            row_emd = dual_affinity.calc_emd(data.T,dual_col_trees[-1],
                     params.row_alpha,params.row_beta)
            row_aff = dual_affinity.emd_dual_aff(row_emd)
        elif params.row_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None
 
        message = "Iteration {}: calculating row tree...".format(i)
        Publisher.sendMessage("status.bar", message)
       
        if params.tree_type == TREE_TYPE_BINARY:
            row_tree = bintree_construct.eigen_tree(data.T,dual_col_trees[-1],
                    params.row_alpha,params.row_beta,params.tree_bal_constant)
        elif params.tree_type == TREE_TYPE_FLEXIBLE:
            row_tree = tree_building.make_tree_embedding(row_aff,
                                     params.tree_constant)
        dual_row_trees.append(row_tree)
        row_tree_descs.append("Iteration {}".format(i))
        quest_run_desc = "{}".format(datetime.datetime.now())

    return PyQuestRun(quest_run_desc,dual_row_trees,dual_col_trees,
                      row_tree_descs,col_tree_descs,params)
Example #13
0
def pyquest(data, params):
    """
    Runs the questionnaire on data with params. 
    params is a PyQuestParams object.
    """

    if params.init_aff_type == INIT_AFF_COS_SIM:
        init_row_aff = affinity.mutual_cosine_similarity(
            data.T, False, 0, threshold=params.init_aff_threshold)
    elif params.init_aff_type == INIT_AFF_GAUSSIAN:
        init_row_aff = affinity.gaussian_euclidean(data.T, params.init_aff_knn,
                                                   params.init_aff_epsilon)

    #Initial row tree
    if params.tree_type == TREE_TYPE_BINARY:
        init_row_tree = bin_tree_build.bin_tree_build(init_row_aff, 'r_dyadic',
                                                      params.tree_bal_constant)
    elif params.tree_type == TREE_TYPE_FLEXIBLE:
        init_row_tree = flex_tree_build.flex_tree_diffusion(
            init_row_aff, params.tree_constant)
    dual_col_trees = []
    dual_row_trees = [init_row_tree]

    row_tree_descs = ["Initial tree"]
    col_tree_descs = []

    for i in xrange(params.n_iters):
        message = "Iteration {}: calculating column affinity...".format(i)

        #print "Beginning iteration {}".format(i)
        if params.col_affinity_type == DUAL_EMD:
            col_emd = dual_affinity.calc_emd(data, dual_row_trees[-1],
                                             params.col_alpha, params.col_beta)
            col_aff = dual_affinity.emd_dual_aff(col_emd)
        elif params.col_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating column tree...".format(i)

        if params.tree_type == TREE_TYPE_BINARY:
            col_tree = bin_tree_build.bin_tree_build(col_aff, 'r_dyadic',
                                                     params.tree_bal_constant)
        elif params.tree_type == TREE_TYPE_FLEXIBLE:
            col_tree = flex_tree_build.flex_tree_diffusion(
                col_aff, params.tree_constant)
        dual_col_trees.append(col_tree)
        col_tree_descs.append("Iteration {}".format(i))

        message = "Iteration {}: calculating row affinity...".format(i)

        if params.row_affinity_type == DUAL_EMD:
            row_emd = dual_affinity.calc_emd(data.T, dual_col_trees[-1],
                                             params.row_alpha, params.row_beta)
            row_aff = dual_affinity.emd_dual_aff(row_emd)
        elif params.row_affinity_type == DUAL_GAUSSIAN:
            print "Gaussian dual affinity not supported at the moment."
            return None

        message = "Iteration {}: calculating row tree...".format(i)

        if params.tree_type == TREE_TYPE_BINARY:
            row_tree = bin_tree_build.bin_tree_build(row_aff, 'r_dyadic',
                                                     params.tree_bal_constant)
        elif params.tree_type == TREE_TYPE_FLEXIBLE:
            row_tree = flex_tree_build.flex_tree_diffusion(
                row_aff, params.tree_constant)
        dual_row_trees.append(row_tree)
        row_tree_descs.append("Iteration {}".format(i))
        quest_run_desc = "{}".format(datetime.datetime.now())

    return PyQuestRun(quest_run_desc, dual_row_trees, dual_col_trees,
                      row_tree_descs, col_tree_descs, params)