def _calc_affinity(self,data,affinity_type,**kwargs): if affinity_type == run_quest.INIT_AFF_COS_SIM: #cosine similarity affinity_matrix = affinity.mutual_cosine_similarity(data,**kwargs) elif affinity_type == run_quest.DUAL_EMD: #EMD emd = dual_affinity.calc_emd(data,**kwargs) affinity_matrix = dual_affinity.emd_dual_aff(emd) return affinity_matrix
def break_node(train_data, col_tree_node, row_tree, regressors=None, k=5, alpha=0.0, beta=1.0, col_emd=None): """ First calculates the EMD on the columns of train_data in col_tree_node.elements using row_tree. Converts that to an affinity. Calculates the second eigenvector of the markov matrix based on that affinity. Then fits a linear model using the rows in regressors (all if it's None) and uses the LASSO path to identify the best k rows. Splits the node using the predicted eigenvector values. """ import sklearn.linear_model as sklm col_indices = col_tree_node.elements node_data = train_data[:, col_indices].astype(np.float64) if col_emd is None: col_emd = dual_affinity.calc_emd(node_data, row_tree, alpha, beta) col_aff = dual_affinity.emd_dual_aff(col_emd) else: col_aff = dual_affinity.emd_dual_aff( col_emd[:, col_indices][col_tree_node.elements, :]) vecs, _ = markov.markov_eigs(col_aff, 2) eig = vecs[:, 1] if regressors is None: regressors = range(row_tree.size) _, active, _ = sklm.lars_path(node_data[regressors, :].T, eig, max_iter=50) regr_indices = active[0:k] lm = sklm.LinearRegression() lm.fit(node_data[regr_indices, :].T, eig) pred_eigs = lm.predict(node_data[regr_indices, :].T) labels = pred_eigs > 0.0 partition = labels * np.ones(labels.shape[0]) col_tree_node.create_subclusters(partition) return np.array([regressors[x] for x in regr_indices]), lm
def pyquest_newtree(data,tree_constant=0.25,row_alpha=0.5,col_alpha=0.5,beta=1.0,n_iters=3): init_row_aff = affinity.mutual_cosine_similarity(data.T,False,0,threshold=0.1) #Compute diffusion embedding of initial affinities init_row_vecs,init_row_vals = markov.markov_eigs(init_row_aff, 12) init_row_vals[np.isnan(init_row_vals)] = 0.0 row_embedding = init_row_vecs.dot(np.diag(init_row_vals)) row_distances = spsp.distance.squareform(spsp.distance.pdist(row_embedding)) row_affinity = np.max(row_distances) - row_distances #Generate initial tree #print "call1 tree_constant:{}".format(tree_constant) init_row_tree = tree_building.make_tree_embedding(row_affinity,tree_constant) dual_col_trees = [] dual_row_trees = [init_row_tree] for _ in xrange(n_iters): #print "Beginning iteration {}".format(i) col_emd = dual_affinity.calc_emd(data,dual_row_trees[-1],alpha=col_alpha,beta=beta) col_aff = dual_affinity.emd_dual_aff(col_emd) #print "call2 tree_constant:{}".format(tree_constant) dual_col_trees.append(tree_building.make_tree_embedding(col_aff,tree_constant)) row_emd = dual_affinity.calc_emd(data.T,dual_col_trees[-1],alpha=row_alpha,beta=beta) row_aff = dual_affinity.emd_dual_aff(row_emd) #print "call3 tree_constant:{}".format(tree_constant) dual_row_trees.append(tree_building.make_tree_embedding(row_aff,tree_constant)) col_tree = dual_col_trees[-1] row_tree = dual_row_trees[-1] col_emd = dual_affinity.calc_emd(data,row_tree,alpha=col_alpha,beta=beta) row_emd = dual_affinity.calc_emd(data.T,col_tree,alpha=row_alpha,beta=beta) row_aff = dual_affinity.emd_dual_aff(row_emd) col_aff = dual_affinity.emd_dual_aff(col_emd) row_vecs,row_vals = markov.markov_eigs(row_aff, 12) col_vecs,col_vals = markov.markov_eigs(col_aff, 12) return row_tree,col_tree,row_vecs,col_vecs,row_vals,col_vals
def break_node(train_data,col_tree_node,row_tree,regressors=None, k=5,alpha=0.0,beta=1.0,col_emd=None): """ First calculates the EMD on the columns of train_data in col_tree_node.elements using row_tree. Converts that to an affinity. Calculates the second eigenvector of the markov matrix based on that affinity. Then fits a linear model using the rows in regressors (all if it's None) and uses the LASSO path to identify the best k rows. Splits the node using the predicted eigenvector values. """ import sklearn.linear_model as sklm col_indices = col_tree_node.elements node_data = train_data[:,col_indices].astype(np.float64) if col_emd is None: col_emd = dual_affinity.calc_emd(node_data,row_tree,alpha,beta) col_aff = dual_affinity.emd_dual_aff(col_emd) else: col_aff = dual_affinity.emd_dual_aff(col_emd[:,col_indices][col_tree_node.elements,:]) vecs,_ = markov.markov_eigs(col_aff,2) eig = vecs[:,1] if regressors is None: regressors = range(row_tree.size) _,active,_ = sklm.lars_path(node_data[regressors,:].T,eig,max_iter=50) regr_indices = active[0:k] lm = sklm.LinearRegression() lm.fit(node_data[regr_indices,:].T,eig) pred_eigs = lm.predict(node_data[regr_indices,:].T) labels = pred_eigs > 0.0 partition = labels*np.ones(labels.shape[0]) col_tree_node.create_subclusters(partition) return np.array([regressors[x] for x in regr_indices]),lm
def pyquest_bintree(data,row_alpha=0.5,col_alpha=0.5,beta=1.0,bal_constant=1.0,n_iters=3): """ runs what is momentarily the standard questionnaire algorithm: initial affinity = mutual cosine similarity initial tree based on median of successive eigenvectors dual affinities based on earth mover distance. dual trees based on eigen_cut method """ #Generate initial affinity init_row_aff = affinity.mutual_cosine_similarity(data.T,False,0,threshold=0.1) #Compute diffusion embedding of initial affinities init_row_vecs,init_row_vals = markov.markov_eigs(init_row_aff, 12) #Generate median trees init_row_tree = bintree_construct.median_tree(init_row_vecs,init_row_vals,max_levels=12) dual_col_trees = [] dual_row_trees = [init_row_tree] for _ in xrange(n_iters): dual_col_trees.append(bintree_construct.old_eigen_tree(data,dual_row_trees[-1],alpha=col_alpha,beta=beta,noise=0.0)) dual_row_trees.append(bintree_construct.old_eigen_tree(data.T,dual_col_trees[-1],alpha=row_alpha,beta=beta,noise=0.0)) # dual_col_trees.append(bintree_construct.eigen_tree(data,dual_row_trees[-1],alpha=col_alpha,beta=beta,bal_constant=bal_constant)) # dual_row_trees.append(bintree_construct.eigen_tree(data.T,dual_col_trees[-1],alpha=row_alpha,beta=beta,bal_constant=bal_constant)) col_tree = dual_col_trees[-1] row_tree = dual_row_trees[-1] col_emd = dual_affinity.calc_emd(data,row_tree,alpha=0.5,beta=1.0) row_emd = dual_affinity.calc_emd(data.T,col_tree,alpha=0.5,beta=1.0) row_aff = dual_affinity.emd_dual_aff(row_emd) col_aff = dual_affinity.emd_dual_aff(col_emd) row_vecs,row_vals = markov.markov_eigs(row_aff, 12) col_vecs,col_vals = markov.markov_eigs(col_aff, 12) return row_tree,col_tree,row_vecs,col_vecs,row_vals,col_vals
def eigen_cut_zero(node,emd,eps=1.0): affinity = dual_affinity.emd_dual_aff(emd[node.elements,:][:,node.elements] ,eps) try: vecs,_ = markov.markov_eigs(affinity,2) except: print affinity print emd print node.elements raise eig = vecs[:,1] n = len(eig) labels = np.ones(n) labels *= (eig > 0.0) return labels
def bal_eigen_cut(node,emd,bal_constant=1.0,eps=1.0): affinity = dual_affinity.emd_dual_aff(emd[node.elements,:][:,node.elements] ,eps) try: vecs,_ = markov.markov_eigs(affinity,2) except: print affinity print emd print node.elements raise eig = vecs[:,1] eig_sorted = np.argsort(eig) n = len(eig) l,r = bal_cut(n,bal_constant) cut_loc = np.random.randint(l,r+1) labels = np.zeros(n,np.int) labels[eig_sorted[0:cut_loc]] = 1 return labels
def eigen_cut(node,emd,noise,eps=1.0): affinity = dual_affinity.emd_dual_aff(emd[node.elements,:][:,node.elements] ,eps) try: vecs,_ = markov.markov_eigs(affinity,2) except: print affinity print emd print node.elements raise eig = vecs[:,1] eig_sorted = np.sort(eig) n = len(eig_sorted) rnoise = np.random.uniform(-noise,noise) if noise < 1e-8: labels = np.zeros(n) labels[np.argsort(eig)[0:int(n/2)]] = 1 else: cut_loc = eig_sorted[int((n/2)+(rnoise*n))] labels = np.ones(n)*(eig > cut_loc) return labels
def pyquest(data,params): """ Runs the questionnaire on data with params. params is a PyQuestParams object. """ if params.init_aff_type == INIT_AFF_COS_SIM: init_row_aff = affinity.mutual_cosine_similarity( data.T,False,0,threshold=params.init_aff_threshold) elif params.init_aff_type == INIT_AFF_GAUSSIAN: init_row_aff = affinity.gaussian_euclidean( data.T, params.init_aff_knn, params.init_aff_epsilon) #Initial row tree if params.tree_type == TREE_TYPE_BINARY: init_row_tree = bin_tree_build.bin_tree_build(init_row_aff,'r_dyadic', params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: init_row_tree = flex_tree_build.flex_tree_diffusion(init_row_aff, params.tree_constant) dual_col_trees = [] dual_row_trees = [init_row_tree] row_tree_descs = ["Initial tree"] col_tree_descs = [] for i in xrange(params.n_iters): message = "Iteration {}: calculating column affinity...".format(i) #print "Beginning iteration {}".format(i) if params.col_affinity_type == DUAL_EMD: col_emd = dual_affinity.calc_emd(data,dual_row_trees[-1], params.col_alpha,params.col_beta) col_aff = dual_affinity.emd_dual_aff(col_emd) elif params.col_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating column tree...".format(i) if params.tree_type == TREE_TYPE_BINARY: col_tree = bin_tree_build.bin_tree_build(col_aff,'r_dyadic', params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: col_tree = flex_tree_build.flex_tree_diffusion(col_aff, params.tree_constant) dual_col_trees.append(col_tree) col_tree_descs.append("Iteration {}".format(i)) message = "Iteration {}: calculating row affinity...".format(i) if params.row_affinity_type == DUAL_EMD: row_emd = dual_affinity.calc_emd(data.T,dual_col_trees[-1], params.row_alpha,params.row_beta) row_aff = dual_affinity.emd_dual_aff(row_emd) elif params.row_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating row tree...".format(i) if params.tree_type == TREE_TYPE_BINARY: row_tree = bin_tree_build.bin_tree_build(row_aff,'r_dyadic', params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: row_tree = flex_tree_build.flex_tree_diffusion(row_aff, params.tree_constant) dual_row_trees.append(row_tree) row_tree_descs.append("Iteration {}".format(i)) quest_run_desc = "{}".format(datetime.datetime.now()) return PyQuestRun(quest_run_desc,dual_row_trees,dual_col_trees, row_tree_descs,col_tree_descs,params)
def pyquest3d(data3d, params): """ Runs the 3d questionnaire on data with params. params is a PyQuest3DParams object. Order of analysis is initialization for rows and columns and then iterating over channels (3rd dimension), rows and columns. """ nrows, ncols, nchans = data3d.shape data_Y = np.reshape(data3d, (nrows, ncols * nchans), order='F') data_X = np.reshape(np.transpose(data3d, (0, 2, 1)), (nrows * nchans, ncols), order='F') if params.init_aff_type == INIT_AFF_COS_SIM: init_row_aff = affinity.mutual_cosine_similarity( data_Y.T, False, 0, threshold=params.init_aff_threshold) init_col_aff = affinity.mutual_cosine_similarity( data_X, False, 0, threshold=params.init_aff_threshold) elif params.init_aff_type == INIT_AFF_GAUSSIAN: init_row_aff = affinity.gaussian_euclidean(data_Y.T, params.init_aff_knn, params.init_aff_epsilon) init_col_aff = affinity.gaussian_euclidean(data_X, params.init_aff_knn, params.init_aff_epsilon) #Initial row tree if params.row_tree_type == TREE_TYPE_BINARY: init_row_tree = bin_tree_build.bin_tree_build( init_row_aff, 'r_dyadic', params.row_tree_bal_constant) elif params.row_tree_type == TREE_TYPE_FLEXIBLE: init_row_tree = flex_tree_build.flex_tree_diffusion( init_row_aff, params.row_tree_constant) # initial column tree if params.col_tree_type == TREE_TYPE_BINARY: init_col_tree = bin_tree_build.bin_tree_build( init_col_aff, 'r_dyadic', params.col_tree_bal_constant) elif params.col_tree_type == TREE_TYPE_FLEXIBLE: init_col_tree = flex_tree_build.flex_tree_diffusion( init_col_aff, params.col_tree_constant) # data structure for trees. All trees calculated in the process are exported dual_row_trees = [init_row_tree] dual_col_trees = [init_col_tree] dual_chan_trees = [] row_tree_descs = ["Initial tree"] col_tree_descs = ["Initial tree"] chan_tree_descs = [] # iterate over the questionnaire starting with channels and then rows and cols in each iteration for i in xrange(params.n_iters): message = "Iteration {}: calculating channel affinity...".format(i) # calculating channel affinity based on row and col trees #print "Beginning iteration {}".format(i) if params.chan_affinity_type == DUAL_EMD: chan_emd2d = dual_affinity.calc_2demd(data3d, init_row_tree, init_col_tree, row_alpha=params.row_alpha, row_beta=params.row_beta, col_alpha=params.col_alpha, col_beta=params.col_beta) chan_aff = dual_affinity.emd_dual_aff(chan_emd2d) chan_tree = flex_tree_build.flex_tree_diffusion( chan_aff, params.chan_tree_constant) elif params.chan_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating column tree...".format(i) # constructing channel tree if params.chan_tree_type == TREE_TYPE_BINARY: chan_tree = bin_tree_build.bin_tree_build( chan_aff, 'r_dyadic', params.chan_tree_bal_constant) elif params.chan_tree_type == TREE_TYPE_FLEXIBLE: chan_tree = flex_tree_build.flex_tree_diffusion( chan_aff, params.chan_tree_constant) dual_chan_trees.append(chan_tree) chan_tree_descs.append("Iteration {}".format(i)) # channel tree finished, now starting with rows message = "Iteration {}: calculating row affinity...".format(i) # calculate row affinity based on column and channel trees if params.row_affinity_type == DUAL_EMD: row_emd2d = dual_affinity.calc_2demd(np.transpose( data3d, (1, 2, 0)), dual_col_trees[-1], dual_chan_trees[-1], row_alpha=params.col_alpha, row_beta=params.col_beta, col_alpha=params.chan_alpha, col_beta=params.chan_beta) row_aff = dual_affinity.emd_dual_aff(row_emd2d) elif params.row_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating row tree...".format(i) # constructing row tree if params.row_tree_type == TREE_TYPE_BINARY: row_tree = bin_tree_build.bin_tree_build( row_aff, 'r_dyadic', params.row_tree_bal_constant) elif params.row_tree_type == TREE_TYPE_FLEXIBLE: row_tree = flex_tree_build.flex_tree_diffusion( row_aff, params.row_tree_constant) dual_row_trees.append(row_tree) row_tree_descs.append("Iteration {}".format(i)) quest_run_desc = "{}".format(datetime.datetime.now()) # calculate column affinity based on row and channel trees if params.col_affinity_type == DUAL_EMD: col_emd2d = dual_affinity.calc_2demd(np.transpose( data3d, (0, 2, 1)), dual_row_trees[-1], dual_chan_trees[-1], row_alpha=params.row_alpha, row_beta=params.row_beta, col_alpha=params.chan_alpha, col_beta=params.chan_beta) col_aff = dual_affinity.emd_dual_aff(col_emd2d) elif params.col_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating column tree...".format(i) # constructing column tree if params.col_tree_type == TREE_TYPE_BINARY: col_tree = bin_tree_build.bin_tree_build( col_aff, 'r_dyadic', params.col_tree_bal_constant) elif params.col_tree_type == TREE_TYPE_FLEXIBLE: col_tree = flex_tree_build.flex_tree_diffusion( col_aff, params.col_tree_constant) dual_col_trees.append(col_tree) col_tree_descs.append("Iteration {}".format(i)) quest_run_desc = "{}".format(datetime.datetime.now()) # iterations have finished, outputting structures of the tree, # parameters return PyQuest3DRun(quest_run_desc, dual_row_trees, dual_col_trees, dual_chan_trees, row_tree_descs, col_tree_descs, chan_tree_descs, params, init_col_aff, init_row_aff, row_aff, col_aff, chan_aff)
def pyquest(data, params): """ Runs the questionnaire on data with params. params is a PyQuestParams object. Starts by constructing the initial affinity on the rows of the matrix (default). """ # construct row affinity if params.init_aff_type == INIT_AFF_COS_SIM: init_row_aff = affinity.mutual_cosine_similarity( data.T, False, 0, threshold=params.init_aff_threshold) elif params.init_aff_type == INIT_AFF_GAUSSIAN: init_row_aff = affinity.gaussian_euclidean(data.T, params.init_aff_knn, params.init_aff_epsilon) #Initial row tree if params.row_tree_type == TREE_TYPE_BINARY: init_row_tree = bin_tree_build.bin_tree_build( init_row_aff, 'r_dyadic', params.row_tree_bal_constant) elif params.row_tree_type == TREE_TYPE_FLEXIBLE: init_row_tree = flex_tree_build.flex_tree_diffusion( init_row_aff, params.row_tree_constant) # data structure for trees. All trees calculated in the process are exported dual_row_trees = [init_row_tree] dual_col_trees = [] row_tree_descs = ["Initial tree"] col_tree_descs = [] # iterate over the questionnaire starting with columns and then rows in each iteration for i in xrange(params.n_iters): message = "Iteration {}: calculating column affinity...".format(i) print message # calculating column affinity based on row tree #print "Beginning iteration {}".format(i) if params.col_affinity_type == DUAL_EMD: if params.col_weighted == True: print "weighted emd" row_coefs = tree_util.tree_transform_mat( dual_row_trees[-1]).dot(data) row_weights = np.sqrt(np.sum(row_coefs**2, axis=1)) col_emd = dual_affinity.calc_emd(data, dual_row_trees[-1], alpha=0, beta=0, weights=row_weights) else: col_emd = dual_affinity.calc_emd(data, dual_row_trees[-1], params.col_alpha, params.col_beta) col_aff = dual_affinity.emd_dual_aff(col_emd) elif params.col_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating column tree...".format(i) print message # constructing column tree if params.col_tree_type == TREE_TYPE_BINARY: col_tree = bin_tree_build.bin_tree_build( col_aff, 'r_dyadic', params.col_tree_bal_constant) elif params.col_tree_type == TREE_TYPE_FLEXIBLE: col_tree = flex_tree_build.flex_tree_diffusion( col_aff, params.col_tree_constant) dual_col_trees.append(col_tree) col_tree_descs.append("Iteration {}".format(i)) # column tree finished, now starting with rows message = "Iteration {}: calculating row affinity...".format(i) print message # calculate row affinity based on column tree if params.row_affinity_type == DUAL_EMD: if params.row_weighted == True: print "weighted emd" col_coefs = tree_util.tree_transform_mat( dual_col_trees[-1]).dot(data.T) col_weights = np.sqrt(np.sum(col_coefs**2, axis=1)) row_emd = dual_affinity.calc_emd(data.T, dual_col_trees[-1], alpha=0, beta=0, weights=col_weights) else: row_emd = dual_affinity.calc_emd(data.T, dual_col_trees[-1], params.row_alpha, params.row_beta) row_aff = dual_affinity.emd_dual_aff(row_emd) elif params.row_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating row tree...".format(i) print message # constructing row tree if params.row_tree_type == TREE_TYPE_BINARY: row_tree = bin_tree_build.bin_tree_build( row_aff, 'r_dyadic', params.row_tree_bal_constant) elif params.row_tree_type == TREE_TYPE_FLEXIBLE: row_tree = flex_tree_build.flex_tree_diffusion( row_aff, params.row_tree_constant) dual_row_trees.append(row_tree) row_tree_descs.append("Iteration {}".format(i)) quest_run_desc = "{}".format(datetime.datetime.now()) # iterations have finished, outputting structures of the tree, # parameters return PyQuestRun(quest_run_desc, dual_row_trees, dual_col_trees, row_tree_descs, col_tree_descs, params)
def pyquest(data,params): #params should be a PyQuestParams object Publisher.sendMessage("status.bar", "Calculating initial affinity...") if params.init_aff_type == INIT_AFF_COS_SIM: init_row_aff = affinity.mutual_cosine_similarity( data.T,False,0,threshold=params.init_aff_threshold) elif params.init_aff_type == INIT_AFF_GAUSSIAN: #add KNN to the page init_row_aff = affinity.gaussian_euclidean( data.T, 5, params.init_aff_epsilon) #Compute diffusion embedding of initial affinities init_row_vecs,init_row_vals = markov.markov_eigs(init_row_aff, 12) init_row_vals[np.isnan(init_row_vals)] = 0.0 row_embedding = init_row_vecs.dot(np.diag(init_row_vals)) row_distances = spsp.distance.squareform(spsp.distance.pdist(row_embedding)) row_affinity = np.max(row_distances) - row_distances #Generate initial tree #print "call1 tree_constant:{}".format(tree_constant) Publisher.sendMessage("status.bar", "Calculating initial row tree...") if params.tree_type == TREE_TYPE_BINARY: init_row_tree = bintree_construct.median_tree( init_row_vecs,init_row_vals,max_levels=12) elif params.tree_type == TREE_TYPE_FLEXIBLE: # init_row_tree = tree_building.make_tree_embedding( # row_affinity,params.tree_constant) init_row_tree = tree_building.make_tree_embedding( row_affinity,params.tree_constant) dual_col_trees = [] dual_row_trees = [init_row_tree] row_tree_descs = ["Initial tree"] col_tree_descs = [] for i in xrange(params.n_iters): message = "Iteration {}: calculating column affinity...".format(i) Publisher.sendMessage("status.bar", message) #print "Beginning iteration {}".format(i) if params.col_affinity_type == DUAL_EMD: col_emd = dual_affinity.calc_emd(data,dual_row_trees[-1], params.col_alpha,params.col_beta) col_aff = dual_affinity.emd_dual_aff(col_emd) elif params.col_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating column tree...".format(i) Publisher.sendMessage("status.bar", message) if params.tree_type == TREE_TYPE_BINARY: col_tree = bintree_construct.eigen_tree(data,dual_row_trees[-1], params.col_alpha,params.col_beta,params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: col_tree = tree_building.make_tree_embedding(col_aff, params.tree_constant) dual_col_trees.append(col_tree) col_tree_descs.append("Iteration {}".format(i)) message = "Iteration {}: calculating row affinity...".format(i) Publisher.sendMessage("status.bar", message) if params.row_affinity_type == DUAL_EMD: row_emd = dual_affinity.calc_emd(data.T,dual_col_trees[-1], params.row_alpha,params.row_beta) row_aff = dual_affinity.emd_dual_aff(row_emd) elif params.row_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating row tree...".format(i) Publisher.sendMessage("status.bar", message) if params.tree_type == TREE_TYPE_BINARY: row_tree = bintree_construct.eigen_tree(data.T,dual_col_trees[-1], params.row_alpha,params.row_beta,params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: row_tree = tree_building.make_tree_embedding(row_aff, params.tree_constant) dual_row_trees.append(row_tree) row_tree_descs.append("Iteration {}".format(i)) quest_run_desc = "{}".format(datetime.datetime.now()) return PyQuestRun(quest_run_desc,dual_row_trees,dual_col_trees, row_tree_descs,col_tree_descs,params)
def pyquest(data, params): """ Runs the questionnaire on data with params. params is a PyQuestParams object. """ if params.init_aff_type == INIT_AFF_COS_SIM: init_row_aff = affinity.mutual_cosine_similarity( data.T, False, 0, threshold=params.init_aff_threshold) elif params.init_aff_type == INIT_AFF_GAUSSIAN: init_row_aff = affinity.gaussian_euclidean(data.T, params.init_aff_knn, params.init_aff_epsilon) #Initial row tree if params.tree_type == TREE_TYPE_BINARY: init_row_tree = bin_tree_build.bin_tree_build(init_row_aff, 'r_dyadic', params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: init_row_tree = flex_tree_build.flex_tree_diffusion( init_row_aff, params.tree_constant) dual_col_trees = [] dual_row_trees = [init_row_tree] row_tree_descs = ["Initial tree"] col_tree_descs = [] for i in xrange(params.n_iters): message = "Iteration {}: calculating column affinity...".format(i) #print "Beginning iteration {}".format(i) if params.col_affinity_type == DUAL_EMD: col_emd = dual_affinity.calc_emd(data, dual_row_trees[-1], params.col_alpha, params.col_beta) col_aff = dual_affinity.emd_dual_aff(col_emd) elif params.col_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating column tree...".format(i) if params.tree_type == TREE_TYPE_BINARY: col_tree = bin_tree_build.bin_tree_build(col_aff, 'r_dyadic', params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: col_tree = flex_tree_build.flex_tree_diffusion( col_aff, params.tree_constant) dual_col_trees.append(col_tree) col_tree_descs.append("Iteration {}".format(i)) message = "Iteration {}: calculating row affinity...".format(i) if params.row_affinity_type == DUAL_EMD: row_emd = dual_affinity.calc_emd(data.T, dual_col_trees[-1], params.row_alpha, params.row_beta) row_aff = dual_affinity.emd_dual_aff(row_emd) elif params.row_affinity_type == DUAL_GAUSSIAN: print "Gaussian dual affinity not supported at the moment." return None message = "Iteration {}: calculating row tree...".format(i) if params.tree_type == TREE_TYPE_BINARY: row_tree = bin_tree_build.bin_tree_build(row_aff, 'r_dyadic', params.tree_bal_constant) elif params.tree_type == TREE_TYPE_FLEXIBLE: row_tree = flex_tree_build.flex_tree_diffusion( row_aff, params.tree_constant) dual_row_trees.append(row_tree) row_tree_descs.append("Iteration {}".format(i)) quest_run_desc = "{}".format(datetime.datetime.now()) return PyQuestRun(quest_run_desc, dual_row_trees, dual_col_trees, row_tree_descs, col_tree_descs, params)