from data_reader import DataReader from co_graph import CoGraph import numpy as np import cluster_merge as cm from log import output import matplotlib.pyplot as plt import scipy from scipy.linalg import logm, expm rd = DataReader() rd.pre_process_cell(threshold=1850, original=True) rd.pre_process_gene(threshold=2600, original=False) genes = rd.get_gene_data() collect = dict() for x in range(len(genes)): for y in range(x + 1, len(genes)): X = genes[x] Y = genes[y] # matrix dimension N = 15 matrix = [[0 for _ in range(N)] for _ in range(N)] for k in range(len(X)): xcord = X[k] ycord = Y[k] if xcord >= N or ycord >= N: continue matrix[int(xcord)][int(ycord)] += 1 # for i in range(len(matrix[0])):
from data_reader import DataReader from co_graph import CoGraph import numpy as np import cluster_merge as cm from log import output from scipy.stats import entropy # preprocess data, tune threshold parameter to adjust data size rd = DataReader() rd.pre_process_cell(threshold=1850, original=True) rd.pre_process_gene(threshold=2600, original=False) # build graph by using hypergeometry test. then use jaccard to filter the # graph. pass parameter to adjust graph. g = CoGraph(rd.get_gene_data()) output('data shape after preprocessing ', g.data.shape) g.build_graph(threshold=0.001, jaccard=True, jaccard_threshold=0.5) # find community by louvain algorithm g.find_partition() # get gene clusters and merge gene in same clusters cm = cm.Cluster(g.data, g.parts) cm.merge() # cell type clustering. build graph by using euclidean distance measurement g2 = CoGraph(cm.parts) output('data shape after gene clustering ', g2.data.shape) g2.build_graph(threshold=1.0, jaccard=True, jaccard_threshold=0.8,