def snp_count_species(lst_file, folder_rnasnp_file): # read gene id sequence path_dir = os.path.dirname(os.path.abspath(lst_file)) lst_name_bare = os.path.splitext(lst_file)[0] jobids = dsh.get_gene_id_sequnce_from_lst(lst_file) file_name_vector = [ os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"])) for jobid in jobids ] is_greater_than = False data_criterion = "pvalue2" threshold = 0.1 gene_snp_count = [] for single_file in file_name_vector: if os.path.exists(single_file): gene_snp_count.append( str( count_snp_per_file(single_file, is_greater_than, threshold, data_criterion) / 3.0)) else: gene_snp_count.append("NA") # writer a new lst file for snp count in some species with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"), "w") as exporter: exporter.write("geneid\tsnpCountPerGene\n") for index_g, jobid in enumerate(jobids): line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g]) exporter.write(line_to_export)
def gap_check_traversal(input_folder, output_file, given_sequence_file=""): """ check gap proportion on all files in "input_folder", and export as lst file to "output_file" with (optional) given_sequence_file :param input_folder: :param output_file: :param given_sequence: :return: """ curdir_abs = os.path.abspath(os.curdir) if not given_sequence_file: jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join(given_sequence_file)) else: raise dsh.idSequenceUnKnow #jobids = sorted([file_input for file_input in os.listdir(input_folder) if ".input" == os.path.splitext(file_input)[-1]]) with open(output_file, "w") as writer: writer.write("gaps\tfull\n") for jobid in jobids: input_name = os.path.join(input_folder, jobid + ".input") aln_name = os.path.join(input_folder, jobid + ".aln") if os.path.exists(input_name): num_gap, full_nt_length = gap_counting_input(input_name) elif os.path.exists(aln_name): num_gap, full_nt_length = gap_counting_input(aln_name) else: raise dsh.WrongFileTypeForGapCheck writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length))) os.chdir(curdir_abs)
def main1(): ## get the gaps number and full_length . aln_path = "/home/zerodel/Workspace/Yeast/result/main_full_length" os.chdir(aln_path) aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]] jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/Workspace/Yeast/result/ExtractedParameter", "gtr.lst")) with open("/home/zerodel/Workspace/Yeast/result/ExtractedParameter/gapyeast.lst", "w") as writer: writer.write("gaps\tfull\n") for jobid in jobids: num_gap, full_nt_length = gap_counting_aln(jobid + ".aln") writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
def main3(): ## get the gaps number and full_length . aln_path = "/home/zerodel/Workspace/sp2" os.chdir(aln_path) aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]] jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/GitProjects/python-rna-structure/data/para", "nest2.lst")) with open("/home/zerodel/GitProjects/python-rna-structure/data/para/gap10.lst", "w") as writer: writer.write("gaps\tfull\n") for jobid in jobids: num_gap, full_nt_length = gap_counting_input(jobid + ".input") writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
def snp_count_species(lst_file, folder_rnasnp_file): # read gene id sequence path_dir = os.path.dirname(os.path.abspath(lst_file)) lst_name_bare = os.path.splitext(lst_file)[0] jobids = dsh.get_gene_id_sequnce_from_lst(lst_file) file_name_vector = [os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"])) for jobid in jobids] is_greater_than = False data_criterion = "pvalue2" threshold = 0.1 gene_snp_count = [] for single_file in file_name_vector: if os.path.exists(single_file): gene_snp_count.append(str(count_snp_per_file(single_file, is_greater_than, threshold, data_criterion)/3.0)) else: gene_snp_count.append("NA") # writer a new lst file for snp count in some species with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"), "w") as exporter: exporter.write("geneid\tsnpCountPerGene\n") for index_g, jobid in enumerate(jobids): line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g]) exporter.write(line_to_export)