def run(bidirfile, fimodir): distances = dict() directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item] for item in directorylist: print item TF = item.split('/')[5].split('_')[0] x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500) if len(x) != 0: start = min(x) stop = max(x) sigma = np.std(x) mu = np.mean(x) N = len(x) y = np.random.uniform(start, stop, N) z = mu/(sigma/math.sqrt(N)) p = 1 - scipy.special.ndtr(z) k = scipy.stats.ks_2samp(x,y) m = scipy.stats.mode(x)[0][0] if -0.25 < m < 0.25: m = 0 else: m = 1 distances[TF] = [k[1],p,m,x] return distances
def run(bidirfile, fimodir): distances = dict() directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item] for item in directorylist: print item TF = item.split('/')[7].split('_')[0] x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500) for i in range(len(x)): x[i] = x[i]*1500 #if len(x) != 0: # counts,edges = np.histogram(x, bins=200) # edges = edges[1:] # X = np.zeros((len(counts), 2)) # X[:,0] = edges # X[:,1] = counts # w = em.fit(X) # ks = list() # for a in range(1000): # d = ds.simulate # ks.append(scipy.stats.ks_2samp(x,d)) # d = np.mean(ks) # start = min(x) # stop = max(x) # sigma = np.std(x) # mu = np.mean(x) # N = len(x) # y = np.random.uniform(start, stop, N) # y = np.linspace(start,stop,N) # z = mu/(sigma/math.sqrt(N)) # p = 1 - scipy.special.ndtr(z) # k = scipy.stats.ks_2samp(x,y) # m = scipy.stats.mode(x)[0][0] # if -0.25 < m < 0.25: # m = 0 # else: # m = 1 # print w,k[1] #distances[TF] = [w,k[1],d,p,m,x] distances[TF] = x return distances
def run(bidirfile, fimodir): distances = dict() directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item] for item in directorylist: print item TF = item.split('/')[6].split('_')[0] x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500) for i in range(len(x)): x[i] = x[i]*1500 if len(x) != 0: counts,edges = np.histogram(x, bins=200) edges = edges[1:] X = np.zeros((len(counts), 2)) X[:,0] = edges X[:,1] = counts w = em.fit(X) w2 = ds2.get_w(X) ks = list() for a in range(1000): d = ds.simulate ks.append(scipy.stats.ks_2samp(x,d)) d = np.mean(ks) start = min(x) stop = max(x) sigma = np.std(x) mu = np.mean(x) N = len(x) y = np.random.uniform(start, stop, N) y = np.linspace(start,stop,N) z = mu/(sigma/math.sqrt(N)) p = 1 - scipy.special.ndtr(z) k = scipy.stats.ks_2samp(x,y) m = scipy.stats.mode(x)[0][0] if -0.25 < m < 0.25: m = 0 else: m = 1 distances[TF] = [w,w2,k[1],d,p,m,x] return distances
def run(bidirfile, fimodir): distances = dict() directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item] bidirsites = Functions.create_site_bidir(bidirfile) for item in directorylist: print item TF = item.split('/')[5].split('_')[0] fimodict = Functions.create_tup_fimo(item + "/fimo.cut.txt", True) for key in bidirsites: start,stop,chrom = key fimotree = fimodict[chrom] fimotree = node.tree(fimotree) intervalsearch = [] for item in fimotree.searchInterval(key): start2,stop2,pval = item i = (start+stop)/2 x = (start2+stop2)/2 intervalsearch.append((i-x,pval)) bidirsites[key].append((TF,intervalsearch)) x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500) if len(x) != 0: start = min(x) stop = max(x) sigma = np.std(x) mu = np.mean(x) N = len(x) #y = np.random.uniform(start, stop, N) y = np.linspace(start,stop,N) z = mu/(sigma/math.sqrt(N)) p = 1 - scipy.special.ndtr(z) k = scipy.stats.ks_2samp(x,y) m = scipy.stats.mode(x)[0][0] if -0.25 < m < 0.25: m = 0 else: m = 1 distances[TF] = [k[1],p,m] return distances,bidirsites