def wangzhangplot(fname): seqlen = getseqlen(fname) print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() start_time = datetime.datetime.now() '''print("non-A: ") wangzhangcalc(seqarr, seqlen, 'A') print("non-G: ") wangzhangcalc(seqarr, seqlen, 'G') print("non-C: ") wangzhangcalc(seqarr, seqlen, 'C')''' result = sqrt( pow(wangzhangcalc(seqarr, seqlen, 'A'), 2) + pow(wangzhangcalc(seqarr, seqlen, 'G'), 2) + pow(wangzhangcalc(seqarr, seqlen, 'C'), 2)) print("mod: " + str(result)) end_time = datetime.datetime.now() time_taken = end_time - start_time print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def randic2dplot(fname): window_list = list() seqlen = getseqlen(fname) print(seqlen, end=",") seqarr = [] f = open(fname, 'r') c = f.read(1) while c!="": c = c.upper() if(c in baseset): seqarr.append(Node(c)) c = f.read(1) f.close() for i in range (0, seqlen - 36, 3): for j in range (i, i+36): seqarr[j].reset() # print("slide_count: " + str(i)) sliding_window(i, 36, seqarr, window_list) # for window in window_list: # print(str(window), end=",") print(','.join(map(str, window_list)))
def taonanwangplot(fname): '''f = open(fname, 'r') seqlen=0 c = f.read(1) while c!="": if(c.upper() in baseset): #print("string is: ", c) seqlen+=1 c = f.read(1) #print(seqlen) f.close()''' seqlen = getseqlen(fname) start_time = datetime.datetime.now() print("wsrocalc: ", wsrocalc(fname, seqlen)) print("mkrocalc: ", mkrocalc(fname, seqlen)) print("ryrocalc: ", ryrocalc(fname, seqlen)) end_time = datetime.datetime.now() time_taken = end_time - start_time print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def lifeizhaoyuplot(fname): seqlen = getseqlen(fname) #print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() start_time = datetime.datetime.now() point = basepos.O count = 0 while (count < seqlen - 1): word = seqarr[count].base + seqarr[count + 1].base #print(count) #print(word) #(doubledict[word]) #point = point.unit_couplet_value(doubledict[word]) #point = doubledict[word].unit_couplet_value(point) point = doubledict[word].add(point) #print(point) count += 1 #print(count) print(point) end_time = datetime.datetime.now() time_taken = end_time - start_time print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def nandyplot(fname): seqlen = getseqlen(fname) print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() curr_x = 0 curr_y = 0 #curr_z = 0 count_a = 0 count_c = 0 count_g = 0 count_t = 0 sum_x = 0 sum_y = 0 total_count = 0 start_time = datetime.datetime.now() edmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): curr_base = seqarr[i].base if (curr_base == 'G'): total_count += 1 count_g += 1 curr_x = curr_x - 1 sum_x = sum_x + curr_x sum_y = sum_y + curr_y #curr_y = curr_y + 1 #curr_z = curr_z - 1 elif (curr_base == 'C'): total_count += 1 count_c += 1 curr_y = curr_y + 1 sum_x = sum_x + curr_x sum_y = sum_y + curr_y #curr_z = curr_z + 1 elif (curr_base == 'A'): total_count += 1 count_a += 1 curr_x = curr_x + 1 sum_x = sum_x + curr_x sum_y = sum_y + curr_y #curr_z = curr_z - 1 elif (curr_base == 'T'): total_count += 1 count_t += 1 curr_y = curr_y - 1 sum_x = sum_x + curr_x sum_y = sum_y + curr_y #curr_z = curr_z + 1 seqarr[i].x = curr_x seqarr[i].y = curr_y mew_x = sum_x / total_count mew_y = sum_y / total_count gr_value = pow((pow(mew_x, 2.0) + pow(mew_y, 2.0)), 0.5) end_time = datetime.datetime.now() time_taken = end_time - start_time print('gr_value = ' + str(gr_value)) print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def songtangplot(fname): seqlen = getseqlen(fname) print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() curr_x = 0 curr_y = 0 start_time = datetime.datetime.now() edmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): '''if(seqarr[i].visited == False): seqarr[i].x = i+1''' for j in range(i, seqlen): if (seqarr[j].visited == False): curr_x += 1 seqarr[j].x = curr_x seqarr[j].y = base_to_y_val_map[seqarr[j].base] seqarr[j].visited = True edmat[i][j] = sqrt( pow((seqarr[i].x - seqarr[j].x), 2) + pow((seqarr[i].y - seqarr[j].y), 2)) edmat[j][i] = edmat[i][j] '''Computing the path-distance matrix''' pdmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (j == i): pdmat[i][j] = 0 elif (i < j): pdmat[i][j] = pdmat[i][j - 1] + edmat[j - 1][j] pdmat[j][i] = pdmat[i][j] '''Computing the M/M matrix''' mbym_mat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): mbym_mat[i][j] = 0 else: mbym_mat[i][j] = edmat[i][j] / abs(i - j) mbym_mat[j][i] = mbym_mat[i][j] np.savetxt("songtang-mm.csv", mbym_mat, delimiter=",") '''Computing the L/L matrix''' lbyl_mat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): lbyl_mat[i][j] = 0 else: lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j] lbyl_mat[j][i] = lbyl_mat[i][j] np.savetxt("songtang-ll.csv", lbyl_mat, delimiter=",") print(np.linalg.eigvals(mbym_mat).max()) end_time = datetime.datetime.now() time_taken = end_time - start_time print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def randic3dplot(fname): seqlen = getseqlen(fname) print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() curr_x = 0 curr_y = 0 curr_z = 0 start_time = datetime.datetime.now() edmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): if (seqarr[i].visited == False): curr_base = seqarr[i].base if (curr_base == 'G'): curr_x = curr_x - 1 curr_y = curr_y + 1 curr_z = curr_z - 1 elif (curr_base == 'C'): curr_x = curr_x - 1 curr_y = curr_y - 1 curr_z = curr_z + 1 elif (curr_base == 'A'): curr_x = curr_x + 1 curr_y = curr_y - 1 curr_z = curr_z - 1 elif (curr_base == 'T'): curr_x = curr_x + 1 curr_y = curr_y + 1 curr_z = curr_z + 1 seqarr[i].x = curr_x seqarr[i].y = curr_y seqarr[i].z = curr_z seqarr[i].visited = True for j in range(i, seqlen): if (seqarr[j].visited == False): curr_base = seqarr[j].base if (curr_base == 'G'): curr_x = curr_x - 1 curr_y = curr_y + 1 curr_z = curr_z - 1 elif (curr_base == 'C'): curr_x = curr_x - 1 curr_y = curr_y - 1 curr_z = curr_z + 1 elif (curr_base == 'A'): curr_x = curr_x + 1 curr_y = curr_y - 1 curr_z = curr_z - 1 elif (curr_base == 'T'): curr_x = curr_x + 1 curr_y = curr_y + 1 curr_z = curr_z + 1 seqarr[j].x = curr_x seqarr[j].y = curr_y seqarr[j].z = curr_z seqarr[j].visited = True edmat[i][j] = sqrt( pow((seqarr[i].x - seqarr[j].x), 2) + pow((seqarr[i].y - seqarr[j].y), 2) + pow((seqarr[i].z - seqarr[j].z), 2)) / pow(3, 0.5) edmat[j][i] = edmat[i][j] '''Computing the path-distance matrix''' pdmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (j == i): pdmat[i][j] = 0 elif (i < j): pdmat[i][j] = pdmat[i][j - 1] + edmat[j - 1][j] pdmat[j][i] = pdmat[i][j] '''Computing the M/M matrix''' mbym_mat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): mbym_mat[i][j] = 0 else: mbym_mat[i][j] = edmat[i][j] / abs(i - j) mbym_mat[j][i] = mbym_mat[i][j] '''Computing the L/L matrix''' lbyl_mat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): lbyl_mat[i][j] = 0 else: lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j] lbyl_mat[j][i] = lbyl_mat[i][j] print(np.linalg.eigvals(mbym_mat).max()) end_time = datetime.datetime.now() time_taken = end_time - start_time print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def yauplot(fname): seqlen = getseqlen(fname) print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): seqarr.append(Node(c)) c = f.read(1) f.close() curr_x = 0.0 curr_y = 0.0 count_a = 0 count_c = 0 count_g = 0 count_t = 0 sum_x = 0.0 sum_y = 0.0 total_count = 0 start_time = datetime.datetime.now() sqrt_three = pow(3, 0.5) half = 1.0 / 2.0 for i in range(0, seqlen): curr_base = seqarr[i].base if (curr_base == 'G'): total_count += 1 count_g += 1 curr_x = float(curr_x + (sqrt_three * half)) sum_x = float(sum_x + curr_x) curr_y = float(curr_y - half) sum_y = float(sum_y + curr_y) elif (curr_base == 'C'): total_count += 1 count_c += 1 curr_x = float(curr_x + (sqrt_three * half)) sum_x = float(sum_x + curr_x) curr_y = float(curr_y + half) sum_y = float(sum_y + curr_y) elif (curr_base == 'A'): total_count += 1 count_a += 1 curr_x = float(curr_x + half) sum_x = float(sum_x + curr_x) curr_y = float(curr_y - (sqrt_three * half)) sum_y = float(sum_y + curr_y) elif (curr_base == 'T'): total_count += 1 count_t += 1 curr_x = float(curr_x + half) sum_x = float(sum_x + curr_x) curr_y = float(curr_y + (sqrt_three * half)) sum_y = float(sum_y + curr_y) seqarr[i].x = curr_x seqarr[i].y = curr_y mew_x = float(sum_x / total_count) mew_y = float(sum_y / total_count) sqr_gr_value = pow(mew_x, 2.0) + pow(mew_y, 2.0) gr_value = pow(sqr_gr_value, 0.5) # gr_value = pow((pow(mew_x, 2.0) + pow(mew_y, 2.0)), 0.5) end_time = datetime.datetime.now() time_taken = end_time - start_time print('mew_x = ' + str(mew_x)) print('mew_y = ' + str(mew_y)) print('sqr_gr_value = ' + str(sqr_gr_value)) print('gr_value = ' + str(gr_value)) print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))
def randic3dplot(fname): seqlen = getseqlen(fname) print("Current thread: " + str(threading.get_ident()) + "; sequence length: " + str(seqlen)) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() start_time = datetime.datetime.now() # edmat = np.zeros((seqlen, seqlen)) edmat = np.zeros((seqlen, seqlen), dtype=float) # edmat = pd.DataFrame(np.zeros((seqlen, seqlen))) for i in range(0, seqlen): visit_node(seqarr[i]) for j in range(i, seqlen): visit_node(seqarr[j]) edmat[i][j] = sqrt( pow((seqarr[i].x - seqarr[j].x), 2) + pow((seqarr[i].y - seqarr[j].y), 2) + pow((seqarr[i].z - seqarr[j].z), 2)) / pow(3, 0.5) edmat[j][i] = edmat[i][j] # pool = Pool(processes=2) # for i in range(0, seqlen): # for j in range(i, seqlen): # #pool.map(memoize_ed(seqarr, edmat, i, j)) # pool.map(memoize_ed, seqarr) # '''Computing the path-distance matrix''' # pdmat = np.zeros((seqlen, seqlen)) # for i in range(0, seqlen): # for j in range(i, seqlen): # if(j==i): # pdmat[i][j] = 0 # elif(i<j): # pdmat[i][j] = pdmat[i][j-1] + edmat[j-1][j] # pdmat[j][i]= pdmat[i][j] # '''Computing the M/M matrix''' # mbym_mat = np.zeros((seqlen, seqlen)) # print(current.name, current._identity + " Size of edmat:" + str(edmat.nbytes)) # print("Current thread: " + str(threading.current_thread().getName) + "; Size of edmat(MB):" + str(edmat.nbytes/(1024*1024))) # print("Current thread: " + str(threading.Thread.getName) + "; Size of edmat(MB):" + str(edmat.nbytes/(1024*1024))) print("Current thread: " + str(threading.get_ident()) + "; Size of edmat(MB):" + str(edmat.nbytes / (1024 * 1024))) # mbym_mat = pd.DataFrame(np.zeros((seqlen, seqlen))) # mbym_mat = np.zeros((seqlen, seqlen)) mbym_mat = np.zeros((seqlen, seqlen), dtype=float) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): mbym_mat[i][j] = 0 else: mbym_mat[i][j] = edmat[i][j] / abs(i - j) mbym_mat[j][i] = mbym_mat[i][j] # print(current.name, current._identity + " Size of mbym_mat:" + str(mbym_mat.nbytes)) print("Current thread: " + str(threading.get_ident()) + "; Size of mbym_mat(MB):" + str(mbym_mat.nbytes / (1024 * 1024))) # '''Computing the L/L matrix''' # lbyl_mat = np.zeros((seqlen, seqlen)) # for i in range(0, seqlen): # for j in range(i, seqlen): # if(i==j): # lbyl_mat[i][j] = 0; # else: # lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j] # lbyl_mat[j][i] = lbyl_mat[i][j] # print(current.name, current._identity + " Max eigvalue: " + np.linalg.eigvals(mbym_mat).max()) # print("Max eigvalue: " + np.linalg.eigvals(mbym_mat).max()) before_eigvals = datetime.datetime.now() # print("Max eigvalue(eig): " + str(np.linalg.eigvals(mbym_mat).max()) + ": time_taken: " + str(datetime.datetime.now() - before_eigvals)) before_eigvalsh = datetime.datetime.now() print("Current thread: " + str(threading.get_ident()) + "; Max eigvalue(eigh): " + str(np.linalg.eigvalsh(mbym_mat).max()) + ": time_taken: " + str(datetime.datetime.now() - before_eigvalsh)) end_time = datetime.datetime.now() time_taken = end_time - start_time # print('time taken = ' + str(time_taken.microseconds)) # print("Current thread: " + str(threading.get_ident()) + "; time taken = " + str(time_taken.seconds)) print("Current thread: " + str(threading.get_ident()) + "; time taken = " + str(time_taken)) gc.collect()
def jiliplot(fname): seqlen = getseqlen(fname) print(seqlen) seqarr = [] f = open(fname, 'r') c = f.read(1) while c != "": c = c.upper() if (c in baseset): #x = Node(c) seqarr.append(Node(c)) c = f.read(1) f.close() #print(seqarr) #f = open(fname, 'r') #c = f.read(1).upper() rcount = 0 ycount = 0 start_time = datetime.datetime.now() '''Computing the Euclidean-distance-distance matrix''' edmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): #seqarr[i].x = 0; seqarr[i].y = 0; if (seqarr[i].base in baseset): if (seqarr[i].visited == False): if (seqarr[i].base in rset): seqarr[i].x = 1 rcount += 1 seqarr[i].y = rcount else: seqarr[i].x = 0 ycount += 1 seqarr[i].y = ycount seqarr[i].visited = True for j in range(i, seqlen): #seqarr[j].x = 0; seqarr[j].y = 0; if (j != i): if (seqarr[j].base in baseset): if (seqarr[j].visited == False): if (seqarr[j].base in rset): seqarr[j].x = 1 rcount += 1 seqarr[j].y = rcount else: seqarr[j].x = 0 ycount += 1 seqarr[j].y = ycount seqarr[j].visited = True else: seqarr[j].x = seqarr[i].x seqarr[j].y = seqarr[i].y edmat[i][j] = sqrt( pow((seqarr[i].x - seqarr[j].x), 2) + pow((seqarr[i].y - seqarr[j].y), 2)) edmat[j][i] = edmat[i][j] '''Computing the path-distance matrix''' pdmat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (j == i): pdmat[i][j] = 0 elif (i < j): pdmat[i][j] = pdmat[i][j - 1] + edmat[j - 1][j] pdmat[j][i] = pdmat[i][j] '''Computing the M/M matrix''' mbym_mat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): mbym_mat[i][j] = 0 else: mbym_mat[i][j] = edmat[i][j] / abs(i - j) mbym_mat[j][i] = mbym_mat[i][j] '''Computing the L/L matrix''' lbyl_mat = np.zeros((seqlen, seqlen)) for i in range(0, seqlen): for j in range(i, seqlen): if (i == j): lbyl_mat[i][j] = 0 else: lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j] lbyl_mat[j][i] = lbyl_mat[i][j] #print(pdmat) #print('time taken = ' + str(time_taken.microseconds)) #print('time taken = ' + str(time_taken.seconds)) '''print(np.linalg.eigvals(edmat).max()) print(np.linalg.eigvals(pdmat).max()) print(np.linalg.eigvals(mbym_mat).max()) print(np.linalg.eigvals(lbyl_mat).max())''' ''' open('jili_ed.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerows(edmat) with open('jili_mm.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerows(mbym_mat) with open('jili_ll.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerows(lbyl_mat) with open('jili_pd.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerows(pdmat)''' '''print('**************************************************') print(edmat) print('**************************************************') print('**************************************************') print(mbym_mat) print('**************************************************') print('**************************************************') print(lbyl_mat) print('**************************************************')''' print(np.linalg.eigvals(lbyl_mat).max()) end_time = datetime.datetime.now() time_taken = end_time - start_time print('time taken = ' + str(time_taken.microseconds)) print('time taken = ' + str(time_taken.seconds))