def mouse_prepare(): parent_path = "/home/zerodel/GitProject" work_path = "/home/zerodel/Workspace/mouse" source_path = "/home/zerodel/Workspace/mouse/mouse_rnasnp" output_path = "/home/zerodel/Workspace/mouse/cpd" single_codon_file_path = os.path.join(work_path, "codon_codon_lst") cc_significant_file_path = os.path.join(work_path, "codon_all.lst") rnasnp_files, snppathname = RNAsnp.snp_dir_list(source_path) # # step 1 , set up .cpd files # RNAsnp.snp_dir_traversal(rnasnp_files, snppathname, output_path, True) # # # step 2 , print each codon lst file # for codon in gc.codon_list_hypothesis: # codon_with_direction = codon + "_" # print "--->", codon_with_direction # RNAsnp.cpd_dir_traversal(codon_with_direction, output_path, single_codon_file_path) RNAsnp.group_divide(single_codon_file_path, cc_significant_file_path) gu_model_file = "/home/zerodel/GitProjects/python-rna-structure/gu_model.mdl" rebuild_model_file = "/home/zerodel/GitProjects/python-rna-structure/rebuild_model.mdl" nested_model_file = os.path.join(work_path, "rna_structure_full_mouse.mdl") # matrixh.degenerate(gu_model_file, # rebuild_model_file) matrixh.nest_export(rebuild_model_file, nested_model_file, cc_significant_file_path)
def make_model(): work_path = "d:/Workspace/Ecoli" source_path = "d:/Workspace/Ecoli/ecoli_snp" output_path = "d:/Workspace/Ecoli/cpd_full" single_codon_file_path = os.path.join(work_path, "codon_lst") cc_significant_file_path = os.path.join(work_path, "codon_all.lst") #step 1 , set up .cpd files gu_model_file = "d:/Home/GitProject/python-rna-structure/gu_model.mdl" rebuild_model_file = "d:/Workspace/Ecoli/rebuild_model_full_length.mdl" nested_model_file = "d:/Workspace/Ecoli/rna_full_length_structure.mdl" rnasnp_files, snppathname = RNAsnp.snp_dir_list(source_path) RNAsnp.snp_dir_traversal(rnasnp_files, snppathname, output_path, True, True) ###step 2 , print each codon lst file for codon in gc.codon_list_hypothesis: codon_with_direction = codon + "_" print "--->", codon_with_direction RNAsnp.cpd_dir_traversal(codon_with_direction, output_path, single_codon_file_path) RNAsnp.group_divide(single_codon_file_path, cc_significant_file_path) matrixh.degenerate(gu_model_file, rebuild_model_file) matrixh.nest_export(rebuild_model_file, nested_model_file, cc_significant_file_path)
def built_model(): parent_path = "/home/zerodel/GitProject" work_path = "/home/zerodel/Workspace" source_path = "/home/zerodel/Workspace/ecoli_snp" output_path = "/home/zerodel/Workspace/cpd_store_site" single_codon_file_path = os.path.join(work_path, "codon_lst") cc_significant_file_path = os.path.join(work_path, "codon_all.lst") rnasnp_files, snppathname = RNAsnp.snp_dir_list(source_path) #step 1 , set up .cpd files # RNAsnp.snp_dir_traversal(rnasnp_files, snppathname, output_path, True) # # step 2 , print each codon lst file # for codon in gc.codon_list_hypothesis: # codon_with_direction = codon + "_" # print "--->", codon_with_direction # RNAsnp.cpd_dir_traversal(codon_with_direction, output_path, single_codon_file_path, 60) # kmeans meet some problem here 20140912 # # R kmeans part ****** # print "kmeans of cluster" # kmeans_script_path = os.path.join(parent_path,"kmeans.R") # cc_significant_file_path = os.path.join(work_path, "codon20.lst") # cmd_line = "Rscript " + kmeans_script_path + " " + single_codon_file_path + " > " + cc_significant_file_path # os.system(cmd_line) # here we choose a new way to infer the group 20140912 # read the result of R process cc_significant_raw = RNAsnp.get_large_codon_group(cc_significant_file_path) cc_significant = [x for x in cc_significant_raw] print len(cc_significant) gu_model_file = "/home/zerodel/GitProjects/python-rna-structure/gu_model.mdl" rebuild_model_file = "/home/zerodel/GitProjects/python-rna-structure/rebuild_model.mdl" nested_model_file = "/home/zerodel/GitProjects/python-rna-structure/rna_structure.mdl" matrixh.degenerate(gu_model_file, rebuild_model_file) matrixh.nest_export(rebuild_model_file, nested_model_file, cc_significant_file_path)
def make_model(): single_codon_file_path = os.path.join(work_path, "codon_lst") cc_significant_file_path = os.path.join(work_path, "codon_all.lst") #step 1 , set up .cpd files rnasnp_files, snppathname = RNAsnp.snp_dir_list(source_path) RNAsnp.snp_dir_traversal(rnasnp_files, snppathname, output_path, True, True) # step 2 , print each codon lst file for codon in gc.codon_list_hypothesis: codon_with_direction = codon + "_" print "--->", codon_with_direction RNAsnp.cpd_dir_traversal(codon_with_direction, output_path, single_codon_file_path) # RNAsnp.group_divide(single_codon_file_path, cc_significant_file_path) matrixh.degenerate(gu_model_file, rebuild_model_file) matrixh.nest_export(rebuild_model_file, nested_model_file, cc_significant_file_path)
writer.write("\t".join(line_single) + "\n") with open( "/home/zerodel/GitProjects/python-rna-structure/data/codon_vector.txt", "w") as writer: writer.write("\n".join(gc.codon_list_hypothesis)) if __name__ == "__main__": parent_path = "/home/zerodel/GitProject" work_path = "/home/zerodel/Workspace" source_path = "/home/zerodel/Workspace/yeast_rnasnp" output_path = "/home/zerodel/Workspace/yeast_cpd" single_codon_file_path = os.path.join(work_path, "codon_lst") cc_significant_file_path = os.path.join(work_path, "codon_all.lst") rnasnp_files, snppathname = RNAsnp.snp_dir_list(source_path) # #step 1 , set up .cpd files # RNAsnp.snp_dir_traversal(rnasnp_files, snppathname, output_path, True) cpd_main = RNAsnp.get_whole_cpd(output_path) codon_vector = gc.codon_list_64 with open( "/home/zerodel/GitProjects/python-rna-structure/data/YeastfullRnasnp.matrix", "w") as writer: available_cc = cpd_main.keys() for codon1 in codon_vector: # build a line for lst file line_single = [] for codon2 in codon_vector: if (codon1, codon2) in available_cc: line_single.append(str(len(cpd_main[(codon1, codon2)])))
line_single = [] for codon2 in codon_vector: line_single.append(str(len(cpd_main[(codon1, codon2)]))) writer.write("\t".join(line_single) + "\n") with open("/home/zerodel/GitProjects/python-rna-structure/data/codon_vector.txt", "w") as writer: writer.write("\n".join(gc.codon_list_hypothesis)) if __name__ == "__main__": parent_path = "/home/zerodel/GitProject" work_path = "/home/zerodel/Workspace" source_path = "/home/zerodel/Workspace/yeast_rnasnp" output_path = "/home/zerodel/Workspace/yeast_cpd" single_codon_file_path = os.path.join(work_path, "codon_lst") cc_significant_file_path = os.path.join(work_path, "codon_all.lst") rnasnp_files, snppathname = RNAsnp.snp_dir_list(source_path) # #step 1 , set up .cpd files # RNAsnp.snp_dir_traversal(rnasnp_files, snppathname, output_path, True) cpd_main = RNAsnp.get_whole_cpd(output_path) codon_vector = gc.codon_list_64 with open("/home/zerodel/GitProjects/python-rna-structure/data/YeastfullRnasnp.matrix", "w") as writer: available_cc = cpd_main.keys() for codon1 in codon_vector: # build a line for lst file line_single = [] for codon2 in codon_vector: if (codon1, codon2) in available_cc: line_single.append(str(len(cpd_main[(codon1, codon2)]))) else: line_single.append(str(0))