def get_para_1004(): # get parameters from fly data . model : gtr, nest, parameter: likelihood, aic, w, psi print "---- " dsh.extract_parameter(dot_result_folder, para_path, ["gtr"], gtr_list_para) dsh.extract_parameter(dot_result_folder, para_path, ["nest"], nest_list_para)
def test_gap_check_traversal(self): folder_input = os.path.abspath(os.curdir) output_gap_lst = "gap_report.txt" try: dsh.gap_check_traversal(folder_input, output_gap_lst) except dsh.idSequenceUnKnow: print "ok , exception raisen" else: self.fail("nope , no exception when there should have some ")
def test_available_traversal(self): folder_input = os.path.abspath(os.curdir) output_gap_lst = "available_sites.txt" job_ids = ["tmp"] try: dsh.report_traversal_available_site(input_folder=folder_input,output_file=output_gap_lst) except dsh.idSequenceUnKnow: pass else: self.fail("id unknow") dsh.report_traversal_available_site(input_folder=folder_input,output_file=output_gap_lst, jobids=job_ids) self.assertTrue(os.path.exists(output_gap_lst))
def main1(): paras = ["likelihood", "aic", "w"] path_working = "/home/zerodel/Workspace/former10" mid = "10" gene_all = dsh.transfer_for_r("gtr", mid, paras, path_working) gene_all_tmp = dsh.transfer_for_r("gu", mid, paras, path_working) if not gene_all == gene_all_tmp: print "not same length" gene_all_tmp = dsh.transfer_for_r("nest", mid, paras, path_working) if not gene_all == gene_all_tmp: print "not same length"
def gap_check_traversal(input_folder, output_file, given_sequence_file=""): """ check gap proportion on all files in "input_folder", and export as lst file to "output_file" with (optional) given_sequence_file :param input_folder: :param output_file: :param given_sequence: :return: """ curdir_abs = os.path.abspath(os.curdir) if not given_sequence_file: jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join(given_sequence_file)) else: raise dsh.idSequenceUnKnow #jobids = sorted([file_input for file_input in os.listdir(input_folder) if ".input" == os.path.splitext(file_input)[-1]]) with open(output_file, "w") as writer: writer.write("gaps\tfull\n") for jobid in jobids: input_name = os.path.join(input_folder, jobid + ".input") aln_name = os.path.join(input_folder, jobid + ".aln") if os.path.exists(input_name): num_gap, full_nt_length = gap_counting_input(input_name) elif os.path.exists(aln_name): num_gap, full_nt_length = gap_counting_input(aln_name) else: raise dsh.WrongFileTypeForGapCheck writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length))) os.chdir(curdir_abs)
def snp_count_species(lst_file, folder_rnasnp_file): # read gene id sequence path_dir = os.path.dirname(os.path.abspath(lst_file)) lst_name_bare = os.path.splitext(lst_file)[0] jobids = dsh.get_gene_id_sequnce_from_lst(lst_file) file_name_vector = [ os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"])) for jobid in jobids ] is_greater_than = False data_criterion = "pvalue2" threshold = 0.1 gene_snp_count = [] for single_file in file_name_vector: if os.path.exists(single_file): gene_snp_count.append( str( count_snp_per_file(single_file, is_greater_than, threshold, data_criterion) / 3.0)) else: gene_snp_count.append("NA") # writer a new lst file for snp count in some species with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"), "w") as exporter: exporter.write("geneid\tsnpCountPerGene\n") for index_g, jobid in enumerate(jobids): line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g]) exporter.write(line_to_export)
def main1(): ## get the gaps number and full_length . aln_path = "/home/zerodel/Workspace/Yeast/result/main_full_length" os.chdir(aln_path) aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]] jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/Workspace/Yeast/result/ExtractedParameter", "gtr.lst")) with open("/home/zerodel/Workspace/Yeast/result/ExtractedParameter/gapyeast.lst", "w") as writer: writer.write("gaps\tfull\n") for jobid in jobids: num_gap, full_nt_length = gap_counting_aln(jobid + ".aln") writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
def main3(): ## get the gaps number and full_length . aln_path = "/home/zerodel/Workspace/sp2" os.chdir(aln_path) aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]] jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/GitProjects/python-rna-structure/data/para", "nest2.lst")) with open("/home/zerodel/GitProjects/python-rna-structure/data/para/gap10.lst", "w") as writer: writer.write("gaps\tfull\n") for jobid in jobids: num_gap, full_nt_length = gap_counting_input(jobid + ".input") writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
def main3(): path_result = "/home/zerodel/Workspace/merge0914/2sp" dir_now = os.path.abspath(os.curdir) os.chdir(path_result) para = "psi" model = "nest" mid = "2" system_order = "grep -i " + para + " *" + model + ".result >" + "../" + model + mid + para + ".txt" os.system(system_order) path_working = "/home/zerodel/Workspace/merge0914" gene_all = dsh.transfer_for_r("nest", mid, ["psi"], path_working)
def snp_count_species(lst_file, folder_rnasnp_file): # read gene id sequence path_dir = os.path.dirname(os.path.abspath(lst_file)) lst_name_bare = os.path.splitext(lst_file)[0] jobids = dsh.get_gene_id_sequnce_from_lst(lst_file) file_name_vector = [os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"])) for jobid in jobids] is_greater_than = False data_criterion = "pvalue2" threshold = 0.1 gene_snp_count = [] for single_file in file_name_vector: if os.path.exists(single_file): gene_snp_count.append(str(count_snp_per_file(single_file, is_greater_than, threshold, data_criterion)/3.0)) else: gene_snp_count.append("NA") # writer a new lst file for snp count in some species with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"), "w") as exporter: exporter.write("geneid\tsnpCountPerGene\n") for index_g, jobid in enumerate(jobids): line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g]) exporter.write(line_to_export)
def prepare4r(): dsh.transfer_for_r("gtr", "", gtr_list_para, "/home/zerodel/Workspace/Fly/ExtractedParameters") dsh.transfer_for_r("nest", "", nest_list_para, "/home/zerodel/Workspace/Fly/ExtractedParameters")
def test_aln(): aln_path = "/home/zerodel/Workspace/Fly/align_fly/fly_12_species" given_genes = "dyak,dere,dpse,dper,dgri,dmoj,dvir,dwil,dana,dsec,dsim,dmel".split(",") dsh.test_gene_species_match(aln_path, given_genes)
def test_aln(): aln_path = "/home/zerodel/Workspace/mouse/mouse_8_species" given_genes = "macaque,chimp,human,dog,cow,horse,rat,mouse".split(",") dsh.test_gene_species_match(aln_path, given_genes)
def test_aln(): aln_path = "/home/zerodel/Workspace/Fly/align_fly/fly_12_species" given_genes = "dyak,dere,dpse,dper,dgri,dmoj,dvir,dwil,dana,dsec,dsim,dmel".split( ",") dsh.test_gene_species_match(aln_path, given_genes)
def test_gap(self): file_hyphy_input = "./tmp.aln" availble_sites,full_length = dsh.sum_available_sites(file_hyphy_input) self.assertEqual(full_length, 4) self.assertEqual(availble_sites,3)
def test_gap_traversal2(self): folder_input = os.path.abspath(os.curdir) output_gap_lst = "gap_report.txt" jobids = ["tmp"] dsh.gap_check_traversal(input_folder=folder_input, output_file=output_gap_lst,jobids=jobids) self.assertTrue(os.path.exists(output_gap_lst))