def batch_for_batch():
    os.chdir("/home/zerodel/Workspace/mouse/FullLengthMouse")
    model_gtr = "rebuild_model.mdl"
    model_nest = "rna_structure_full_mouse.mdl"
    aln_path = "/home/zerodel/Workspace/mouse/mouse_8_species"

    bf_maker_nest = BfH.HYPHYBatchFile(species_name="mouse", model_file=model_nest, bf_template_file="template_global")
    bf_maker_gtr = BfH.HYPHYBatchFile(species_name="mouse", model_file=model_gtr, bf_template_file="template_global")

    jobids = [file1.split(".")[0] for file1 in os.listdir(aln_path) if ".aln" == file1[-4:]]

    for job_id in jobids:
        aln_file_path_full = os.path.join(aln_path, job_id + ".aln")
        input_file = os.path.join(os.curdir, job_id + ".input")

        if not os.path.exists(input_file):
            DH.aln2input(aln_file_path_full, input_file)

        bfgtr = job_id + "gtr.bf"
        bf_maker_gtr.write_batch_file(
            dot_input=job_id + ".input",
            dot_aln=aln_file_path_full,
            hyphy_result_file=job_id + "gtr.result",
            hyphy_batch_file=bfgtr,
        )

        bfnest = job_id + "nest.bf"
        bf_maker_nest.write_batch_file(
            dot_input=job_id + ".input",
            dot_aln=aln_file_path_full,
            hyphy_result_file=job_id + "nest.result",
            hyphy_batch_file=bfnest,
        )
    def test_aln_to_no_gap_input(self):
        try:
            dh.check_sequence_matrix(["aaca", "aaa"])
        except dh.sequenceNotSameLength:
            pass
        else:
            self.fail("here should raise a exception!")

        dh.aln2inputNogap("./tmp.aln","./tmp2.input")
Beispiel #3
0
    def test_aln_to_no_gap_input(self):
        try:
            dh.check_sequence_matrix(["aaca", "aaa"])
        except dh.sequenceNotSameLength:
            pass
        else:
            self.fail("here should raise a exception!")

        dh.aln2inputNogap("./tmp.aln", "./tmp2.input")
Beispiel #4
0
def make_bf_p2():
    workpath = "d:\Workspace\Ecoli\P2"
    os.chdir(workpath)
    model_gtr = "rebuild_model.mdl"
    model_nest = "rna_full_length_structure.mdl"

    bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli",
                                      model_file=model_gtr,
                                      bf_template_file="templateNoGap")

    bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli",
                                      model_file=model_nest,
                                      bf_template_file="templateNoGap")

    bf_maker_gtr.set_tree_from_outside(SS.ecoli())
    bf_maker_nest.set_tree_from_outside(SS.ecoli())

    aln_file_folder = "d:\Workspace\Ecoli\ecoli_10_species"

    aln_files = [file_single for file_single in os.listdir(aln_file_folder) if ".aln" == os.path.splitext(file_single)[-1]]


    for single_aln in aln_files:
        aln_full_path = os.path.join(aln_file_folder, single_aln)
        genes, lengths = DH.aln_info(aln_full_path)
        gene_full_length = lengths[0]
        jobid = single_aln.split(".")[0]
        input_file_name = "%s.input" % jobid
        DH.aln2input(aln_full_path, input_file_name)

        if gene_full_length < 52:
            print "%s too short ---" % single_aln
            continue

        bf_maker_gtr.set_partition(51, gene_full_length)
        bf_maker_nest.set_partition(51, gene_full_length)

        bf_maker_gtr.write_batch_file(dot_input=input_file_name,
                                      dot_aln="",
                                      hyphy_batch_file="%sp2gtr.bf" % jobid,
                                      hyphy_result_file="%sp2gtr.result" % jobid)


        bf_maker_nest.write_batch_file(dot_input=input_file_name,
                                      dot_aln="",
                                      hyphy_batch_file="%sp2nest.bf" % jobid,
                                      hyphy_result_file="%sp2nest.result" % jobid)
    def write_batch_file(self, dot_input, dot_aln, hyphy_batch_file="", hyphy_result_file=""):
        """ write a .bf file for a alignment"""
        gene_id = os.path.basename(dot_input).split(os.path.extsep)[0]
        path_main = os.path.splitext(dot_input)[0]

        if "" == hyphy_batch_file:
            hyphy_batch_file = path_main + ".bf"

        if "" == hyphy_result_file:
            hyphy_result_file = path_main + ".result"

        if "" == self.batch_content:
            raise BFError

        # replace begins here
        batch_content, num_hits = re.subn(self.f_input, dot_input, self.batch_content)
        self._error_no_hit(num_hits)

        # partition is optional
        if (0, 0) == self.partition:
            if self.f_partition in batch_content:
                batch_content, num_hits = re.subn(self.f_partition, "", batch_content)
                self._error_no_hit(num_hits)

        else:
            batch_content, num_hits = re.subn(self.f_partition, "%d-%d" % self.partition, batch_content)
            self._error_no_hit(num_hits)

        batch_content, num_hits = re.subn(self.f_mdl, self.mdl_file, batch_content)
        self._error_no_hit(num_hits)

        # only support 1 matrix now :2014-5-26
        batch_content, num_hits = re.subn(self.f_matrix_name, self.matrix_name[0], batch_content)
        self._error_no_hit(num_hits)

        if self.use_given_tree:
            tree_newick_string = self.tree_definition_external
        else:
            genes_share_aln = pHdata.aln_reader(dot_aln)
            tree_newick_string = self.build_tree(genes_share_aln)

        batch_content, num_hits = re.subn(self.f_tree, tree_newick_string, batch_content)
        self._error_no_hit(num_hits)

        batch_content, num_hits = re.subn(self.f_output, hyphy_result_file, batch_content)
        self._error_no_hit(num_hits)

        self.check_whether_incomplete(batch_content)

        with open(name=hyphy_batch_file, mode="w") as bf_writer:
            bf_writer.write(batch_content)
Beispiel #6
0
def check_gene_order_in_alignments(path_to_alignments):
    """
    判断path_to_alignment 里面所有的.aln 文件里物种名是否都是一样的.

    :param path_to_alignments:
    :return:
    """
    # get file-names
    import os
    import os.path

    current_dir = os.path.abspath(os.curdir)
    os.chdir(path_to_alignments)
    aln_files = [single_file for single_file in os.listdir(path_to_alignments)
                 if ".aln" == os.path.splitext(single_file)[-1]]

    try:
        import pyHYPHY.DataHYPHY as dh

    except ImportError:
        print "error in importing pyHYPHY module"
        return

    aln_genes_calibration = dh.aln_info(aln_files[0])[0]
    gene_un_match = 0
    for aln_entry in aln_files:
        if not aln_genes_calibration == dh.aln_info(aln_entry)[0]:
            print aln_entry, ":---- ", str(dh.aln_info(aln_entry)[0]), "\n"
            gene_un_match += 1

    print "whole number of unmatch file is %d \n and pattern is :\n %s \n" % (gene_un_match, aln_genes_calibration)

    os.chdir(current_dir)

    if 0 == gene_un_match:
        return aln_genes_calibration
    else:
        raise AlignmentNotSame
def batch_for_batch():
    os.chdir("/home/zerodel/Workspace/mouse/FullLengthMouse")
    model_gtr = "rebuild_model.mdl"
    model_nest = "rna_structure_full_mouse.mdl"
    aln_path = "/home/zerodel/Workspace/mouse/mouse_8_species"

    bf_maker_nest = BfH.HYPHYBatchFile(species_name="mouse",
                                       model_file=model_nest,
                                       bf_template_file="template_global")
    bf_maker_gtr = BfH.HYPHYBatchFile(species_name="mouse",
                                      model_file=model_gtr,
                                      bf_template_file="template_global")

    jobids = [
        file1.split(".")[0] for file1 in os.listdir(aln_path)
        if ".aln" == file1[-4:]
    ]

    for job_id in jobids:
        aln_file_path_full = os.path.join(aln_path, job_id + ".aln")
        input_file = os.path.join(os.curdir, job_id + ".input")

        if not os.path.exists(input_file):
            DH.aln2input(aln_file_path_full, input_file)

        bfgtr = job_id + "gtr.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=aln_file_path_full,
                                      hyphy_result_file=job_id + "gtr.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nest.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=aln_file_path_full,
                                       hyphy_result_file=job_id +
                                       "nest.result",
                                       hyphy_batch_file=bfnest)
Beispiel #8
0
def check_aln_full_species(folder_alns):
    all_alns = [single_file for single_file in os.listdir(folder_alns) if single_file[-4:] == ".aln"]
    aln_gene_num = []
    for single_file in all_alns:
        full_aln_path = os.path.join(folder_alns, single_file)
        genes, gene_lengths = dh.aln_info(full_aln_path)
        aln_gene_num.append(len(genes))

    max_num = max(aln_gene_num)
    aln_full_gene = []
    for indexI, aln in enumerate(all_alns):
        if aln_gene_num[indexI] == max_num:
            aln_full_gene.append(all_alns[indexI])

    return aln_full_gene
Beispiel #9
0
def check_aln_full_species(folder_alns):
    all_alns = [
        single_file for single_file in os.listdir(folder_alns)
        if single_file[-4:] == ".aln"
    ]
    aln_gene_num = []
    for single_file in all_alns:
        full_aln_path = os.path.join(folder_alns, single_file)
        genes, gene_lengths = dh.aln_info(full_aln_path)
        aln_gene_num.append(len(genes))

    max_num = max(aln_gene_num)
    aln_full_gene = []
    for indexI, aln in enumerate(all_alns):
        if aln_gene_num[indexI] == max_num:
            aln_full_gene.append(all_alns[indexI])

    return aln_full_gene
Beispiel #10
0
def check_aln_files():
    """
    check which .aln file contains gene of all 10 species
    :return:
    """
    folder_alns = "/media/zerodel/Home/Work/custom/ecoli_aln"
    all_alns = [single_file for single_file in os.listdir(folder_alns) if single_file[-4:] == ".aln"]
    aln_gene_num = []
    for single_file in all_alns:
        full_aln_path = os.path.join(folder_alns, single_file)
        genes, gene_lengths = DataHyPHY.aln_info(full_aln_path)
        aln_gene_num.append(len(genes))

    max_num = max(aln_gene_num)
    aln_full_gene = []
    for indexI, aln in enumerate(all_alns):
        if aln_gene_num[indexI] == max(aln_gene_num):
            aln_full_gene.append(all_alns[indexI])

    print "fulllength has", len(aln_full_gene), "with ", str(max_num), "genes"
Beispiel #11
0
def check_aln_files():
    """
    check which .aln file contains gene of all 10 species
    :return:
    """
    folder_alns = "/media/zerodel/Home/Work/custom/ecoli_aln"
    all_alns = [
        single_file for single_file in os.listdir(folder_alns)
        if single_file[-4:] == ".aln"
    ]
    aln_gene_num = []
    for single_file in all_alns:
        full_aln_path = os.path.join(folder_alns, single_file)
        genes, gene_lengths = DataHyPHY.aln_info(full_aln_path)
        aln_gene_num.append(len(genes))

    max_num = max(aln_gene_num)
    aln_full_gene = []
    for indexI, aln in enumerate(all_alns):
        if aln_gene_num[indexI] == max(aln_gene_num):
            aln_full_gene.append(all_alns[indexI])

    print "fulllength has", len(aln_full_gene), "with ", str(max_num), "genes"
Beispiel #12
0
    def test_something(self):
        raw_sequence = "aca----gt"
        gap_removed = "aca"

        self.assertEqual(gap_removed, dh.remove_gaps(raw_sequence))
Beispiel #13
0
    def test_remove_gap_matrix(self):
        raw_matrix = ["aaa--a", "bbbcac"]
        fileterd_matrix = ["aaa", "bbb"]

        self.assertEqual(fileterd_matrix, dh.remove_gaps_matrix(raw_matrix))
Beispiel #14
0
    current_dir = os.path.abspath(os.curdir)
    os.chdir(path_aln_file)
    species_list = check_gene_order_in_alignments(path_aln_file)
    inputfile_header = [">%s" % species_name for species_name in species_list]
    matrix_sequence = ["" for species_name in species_list]

    aln_files = [single_file for single_file in os.listdir(path_aln_file)
                 if ".aln" == os.path.splitext(single_file)[-1]]

    for single_aln_file in aln_files:
        # single file operation
        # rejection : 1. length not enough 2 two many  gaps
        try:
<<<<<<< HEAD
            matrix_sequence = paste_matrix(matrix_sequence,DH.remove_gaps_matrix(extract_TIR_single_file(single_aln_file, length_of_TIR, start_point)))
=======

            if remove_gap:
                gene_seq_matrix_addition = DH.remove_gaps_matrix(extract_TIR_single_file(single_aln_file, length_of_TIR, start_point))
            else:
                gene_seq_matrix_addition = extract_TIR_single_file(single_aln_file, length_of_TIR, start_point)

            matrix_sequence = paste_matrix(matrix_sequence, gene_seq_matrix_addition)
>>>>>>> f0acd743d1106c96b88083b2df2cb3526b388aec

        except SequenceTooShort:
            print "Too short in %s" % single_aln_file
            continue
        except DimNotSame:
            print "Error of Dimisions %s" % single_aln_file
def aln_folder_traversal(folder_name):
    """    """
    built_in_gy94mdl = "/usr/lib/hyphy/TemplateBatchFiles/TemplateModels/GY94.mdl"

    gy94bf = bfHYPHY.HYPHYBatchFile(species_name="ecoli",
                                    model_file=built_in_gy94mdl,
                                    bf_template_file="partition.bf")

    own_model = "nest_gy.mdl"
    nested_model = bfHYPHY.HYPHYBatchFile(species_name="ecoli",
                                          model_file=own_model,
                                          bf_template_file="partition.bf")

    nt_nest_model = "nt_nest_gy.mdl"
    bf_nt_nest = bfHYPHY.HYPHYBatchFile(species_name="ecoli",
                                     model_file=nt_nest_model,
                                     bf_template_file="partition.bf")

    gu_model = "myCodonMatrix.def"
    gu_bf = bfHYPHY.HYPHYBatchFile(species_name="ecoli",
                                      model_file=gu_model,
                                      bf_template_file="synAlphaWPsiModelP.bf")

    pwd = os.path.abspath(folder_name)
    os.chdir(folder_name)
    aln_files = [file1
                 for file1 in os.listdir(pwd)
                 if "aln" == file1.split(".")[-1]]

    for index, aln_name in enumerate(aln_files):
        # write batch file for each aln
        gene_id = aln_name.split(".")[0]
        genes, gene_len = dataHYPHY.aln_info(aln_name)
        len_gene = max(gene_len)

        gy94bf1 = "%sgy94.bf" % gene_id
        input_filename = "%s.input" % gene_id

        # input file here
        dataHYPHY.aln2input(dot_aln_file=aln_name, hyphy_input_file=input_filename)

        # gy model , full length
        gy94bf.write_batch_file(dot_aln=aln_name,
                                dot_input=input_filename,
                                hyphy_batch_file=gy94bf1,
                                hyphy_result_file="gy94%s_%s.result" % (gene_id, ""))

        # gy model , part 1
        gy94p1_name = "%sgy94p1.bf" % gene_id
        gy94bf.set_partition(0, 60)
        gy94bf.write_batch_file(dot_aln=aln_name,
                                dot_input=input_filename,
                                hyphy_batch_file=gy94p1_name,
                                hyphy_result_file="gy94p1%s_%s.result" % (gene_id, ""))

        # gy model , part 2
        gy94p2_name = "%sgy94p2.bf" % gene_id
        gy94bf.set_partition(60, len_gene)
        gy94bf.write_batch_file(dot_aln=aln_name,
                                dot_input=input_filename,
                                hyphy_batch_file=gy94p2_name,
                                hyphy_result_file="gy94p2%s_%s.result" % (gene_id, ""))

        # nested model , full length
        nested_model.write_batch_file(dot_aln=aln_name,
                                      dot_input=input_filename,
                                      hyphy_result_file="nest_gy%s.result" % gene_id,
                                      hyphy_batch_file="%snest_gy.bf" % gene_id)

        # nested model , part 1
        nested_model.set_partition(0, 60)
        nested_model.write_batch_file(dot_aln=aln_name,
                                      dot_input=input_filename,
                                      hyphy_result_file="nest_gy%s_p1.result" % gene_id,
                                      hyphy_batch_file="%snest_gyp1.bf" % gene_id)

        # nested model , part 2
        nested_model.set_partition(60, len_gene)
        nested_model.write_batch_file(dot_aln=aln_name,
                                      dot_input=input_filename,
                                      hyphy_result_file="nest_gy%s_p2.result" % gene_id,
                                      hyphy_batch_file="%snest_gyp2.bf" % gene_id)

        # nt nested ,full
        bf_nt_nest.write_batch_file(dot_aln=aln_name,
                                    dot_input=input_filename,
                                    hyphy_result_file="%snt_nest.result" % gene_id,
                                    hyphy_batch_file="%snt_nest.bf" % gene_id)

        # nt nested , part1
        bf_nt_nest.set_partition(0, 60)
        bf_nt_nest.write_batch_file(dot_aln=aln_name,
                                    dot_input=input_filename,
                                    hyphy_result_file="%snt_nest_p1.result" % gene_id,
                                    hyphy_batch_file="%snt_nest_p1.bf" % gene_id)

        # nt nested , part2
        bf_nt_nest.set_partition(60, len_gene)
        bf_nt_nest.write_batch_file(dot_aln=aln_name,
                                    dot_input=input_filename,
                                    hyphy_result_file="%snt_nest_p2.result" % gene_id,
                                    hyphy_batch_file="%snt_nest_p2.bf" % gene_id)


        # gu model . full length
        gu_bf.write_batch_file(dot_aln=aln_name,
                               dot_input=input_filename,
                               hyphy_result_file="%sgu.result" % gene_id,
                               hyphy_batch_file="%sgu.bf" % gene_id)

        gu_bf.set_partition(0, 60)
        gu_bf.write_batch_file(dot_aln=aln_name,
                       dot_input=input_filename,
                       hyphy_result_file="%sgup1.result" % gene_id,
                       hyphy_batch_file="%sgup1.bf" % gene_id)

        gu_bf.set_partition(60 ,len_gene)
        gu_bf.write_batch_file(dot_aln=aln_name,
                       dot_input=input_filename,
                       hyphy_result_file="%sgup2.result" % gene_id,
                       hyphy_batch_file="%sgup2.bf" % gene_id)
Beispiel #16
0
def main():
    # main part
    aln_files_folder = "d:/Workspace/Ecoli/ecoli_10_species"
    #aln_files_folder  = "d:/Workspace/Ecoli/test"
    target_path = "d:/Workspace/Ecoli/NoGapP1"

    aln_files = [single_file for single_file in os.listdir(aln_files_folder)
                 if ".aln" == os.path.splitext(single_file)[-1]]

    model_gtr = "rebuild_model.mdl"
    model_nest = "rna_full_length_structure.mdl"

    job_ids = []
    seq_length = []
    for aln in aln_files:
        aln_path_full = os.path.join(aln_files_folder, aln)

        gene_in_aln, length_in_aln = dh.aln_info(aln_path_full)
        seq_length.append(length_in_aln[0])
        job_id = aln.split(".")[0]
        if 10 == len(gene_in_aln):   # only those gene shared in 10 species
            job_ids.append(job_id)
            input_file_path_full = os.path.join(target_path, job_id + ".input")
            shutil.copyfile(aln_path_full, os.path.join(target_path, aln))
            # write input file
            dh.aln2inputNogap(aln_path_full, input_file_path_full)

            # make bf file for full length no gap gtr and empirical model

    os.chdir(target_path)
    bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli",
                                       model_file=model_nest,
                                       bf_template_file="templateNoGap")
    bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli",
                                      model_file=model_gtr,
                                      bf_template_file="templateNoGap")

    for job_id in job_ids:
        # aln_path_full = os.path.join(aln_files_folder,aln)
        # input_file_path_full = os.path.join(target_path, job_id + ".input")
        # shutil.copyfile(aln_path_full, os.path.join(target_path,aln))
        # # write input file
        # dh.aln2inputNogap(aln_path_full, input_file_path_full)

        # make bf file for full length no gap gtr and empirical model

        bfgtr = job_id + "gtr.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=job_id + ".aln",
                                      hyphy_result_file=job_id + "gtr.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nest.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=job_id + ".aln",
                                       hyphy_result_file=job_id + "nest.result",
                                       hyphy_batch_file=bfnest)

    bf_maker_gtr.set_partition(0, 51)
    bf_maker_nest.set_partition(0, 51)

    for job_id in job_ids:
        bfgtr = job_id + "gtrp1.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=job_id + ".aln",
                                      hyphy_result_file=job_id + "gtrp1.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nestp1.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=job_id + ".aln",
                                       hyphy_result_file=job_id + "nestp1.result",
                                       hyphy_batch_file=bfnest)

    for indexI, job_id in enumerate(job_ids):
        bf_maker_gtr.set_partition(51, seq_length[indexI])
        bf_maker_nest.set_partition(51, seq_length[indexI])

        bfgtr = job_id + "gtrp2.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=job_id + ".aln",
                                      hyphy_result_file=job_id + "gtrp2.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nestp2.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=job_id + ".aln",
                                       hyphy_result_file=job_id + "nestp2.result",
                                       hyphy_batch_file=bfnest)
    def test_remove_gap_matrix(self):
        raw_matrix = ["aaa--a", "bbbcac"]
        fileterd_matrix = ["aaa", "bbb"]

        self.assertEqual(fileterd_matrix, dh.remove_gaps_matrix(raw_matrix))
Beispiel #18
0
def main():
    # main part
    aln_files_folder = "d:/Workspace/Ecoli/ecoli_10_species"
    #aln_files_folder  = "d:/Workspace/Ecoli/test"
    target_path = "d:/Workspace/Ecoli/NoGapP1"

    aln_files = [
        single_file for single_file in os.listdir(aln_files_folder)
        if ".aln" == os.path.splitext(single_file)[-1]
    ]

    model_gtr = "rebuild_model.mdl"
    model_nest = "rna_full_length_structure.mdl"

    job_ids = []
    seq_length = []
    for aln in aln_files:
        aln_path_full = os.path.join(aln_files_folder, aln)

        gene_in_aln, length_in_aln = dh.aln_info(aln_path_full)
        seq_length.append(length_in_aln[0])
        job_id = aln.split(".")[0]
        if 10 == len(gene_in_aln):  # only those gene shared in 10 species
            job_ids.append(job_id)
            input_file_path_full = os.path.join(target_path, job_id + ".input")
            shutil.copyfile(aln_path_full, os.path.join(target_path, aln))
            # write input file
            dh.aln2inputNogap(aln_path_full, input_file_path_full)

            # make bf file for full length no gap gtr and empirical model

    os.chdir(target_path)
    bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli",
                                       model_file=model_nest,
                                       bf_template_file="templateNoGap")
    bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli",
                                      model_file=model_gtr,
                                      bf_template_file="templateNoGap")

    for job_id in job_ids:
        # aln_path_full = os.path.join(aln_files_folder,aln)
        # input_file_path_full = os.path.join(target_path, job_id + ".input")
        # shutil.copyfile(aln_path_full, os.path.join(target_path,aln))
        # # write input file
        # dh.aln2inputNogap(aln_path_full, input_file_path_full)

        # make bf file for full length no gap gtr and empirical model

        bfgtr = job_id + "gtr.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=job_id + ".aln",
                                      hyphy_result_file=job_id + "gtr.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nest.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=job_id + ".aln",
                                       hyphy_result_file=job_id +
                                       "nest.result",
                                       hyphy_batch_file=bfnest)

    bf_maker_gtr.set_partition(0, 51)
    bf_maker_nest.set_partition(0, 51)

    for job_id in job_ids:
        bfgtr = job_id + "gtrp1.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=job_id + ".aln",
                                      hyphy_result_file=job_id +
                                      "gtrp1.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nestp1.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=job_id + ".aln",
                                       hyphy_result_file=job_id +
                                       "nestp1.result",
                                       hyphy_batch_file=bfnest)

    for indexI, job_id in enumerate(job_ids):
        bf_maker_gtr.set_partition(51, seq_length[indexI])
        bf_maker_nest.set_partition(51, seq_length[indexI])

        bfgtr = job_id + "gtrp2.bf"
        bf_maker_gtr.write_batch_file(dot_input=job_id + ".input",
                                      dot_aln=job_id + ".aln",
                                      hyphy_result_file=job_id +
                                      "gtrp2.result",
                                      hyphy_batch_file=bfgtr)

        bfnest = job_id + "nestp2.bf"
        bf_maker_nest.write_batch_file(dot_input=job_id + ".input",
                                       dot_aln=job_id + ".aln",
                                       hyphy_result_file=job_id +
                                       "nestp2.result",
                                       hyphy_batch_file=bfnest)
    def test_something(self):
        raw_sequence = "aca----gt"
        gap_removed = "aca"

        self.assertEqual(gap_removed, dh.remove_gaps(raw_sequence))
Beispiel #20
0
    def write_batch_file(self,
                         dot_input,
                         dot_aln,
                         hyphy_batch_file="",
                         hyphy_result_file=""):
        """ write a .bf file for a alignment"""
        gene_id = os.path.basename(dot_input).split(os.path.extsep)[0]
        path_main = os.path.splitext(dot_input)[0]

        if "" == hyphy_batch_file:
            hyphy_batch_file = path_main + ".bf"

        if "" == hyphy_result_file:
            hyphy_result_file = path_main + ".result"

        if "" == self.batch_content:
            raise BFError

        # replace begins here
        batch_content, num_hits = re.subn(self.f_input, dot_input,
                                          self.batch_content)
        self._error_no_hit(num_hits)

        # partition is optional
        if (0, 0) == self.partition:
            if self.f_partition in batch_content:
                batch_content, num_hits = re.subn(self.f_partition, "",
                                                  batch_content)
                self._error_no_hit(num_hits)

        else:
            batch_content, num_hits = re.subn(self.f_partition,
                                              "%d-%d" % self.partition,
                                              batch_content)
            self._error_no_hit(num_hits)

        batch_content, num_hits = re.subn(self.f_mdl, self.mdl_file,
                                          batch_content)
        self._error_no_hit(num_hits)

        # only support 1 matrix now :2014-5-26
        batch_content, num_hits = re.subn(self.f_matrix_name,
                                          self.matrix_name[0], batch_content)
        self._error_no_hit(num_hits)

        if self.use_given_tree:
            tree_newick_string = self.tree_definition_external
        else:
            genes_share_aln = pHdata.aln_reader(dot_aln)
            tree_newick_string = self.build_tree(genes_share_aln)

        batch_content, num_hits = re.subn(self.f_tree, tree_newick_string,
                                          batch_content)
        self._error_no_hit(num_hits)

        batch_content, num_hits = re.subn(self.f_output, hyphy_result_file,
                                          batch_content)
        self._error_no_hit(num_hits)

        self.check_whether_incomplete(batch_content)

        with open(name=hyphy_batch_file, mode="w") as bf_writer:
            bf_writer.write(batch_content)