예제 #1
0
def optimize_tss(args_ops):
    if len(os.listdir(args_ops.gffs)) == 0:
        print("Error: there is no gff files!!!")
        sys.exit()
    if len(os.listdir(args_ops.fastas)) == 0:
        print("Error: there is no fasta files!!!")
        sys.exit()
    if len(os.listdir(args_ops.wigs)) == 0:
        print("Error: there is no wiggle files!!!")
        sys.exit()
    Multiparser().parser_wig(args_ops.wigs)
    Multiparser().parser_gff(args_ops.gffs, None)
    Multiparser().parser_fasta(args_ops.fastas)
    gff_path = os.path.join(args_ops.gffs, "tmp")
    wig_path = os.path.join(args_ops.wigs, "tmp")
    fasta_path = os.path.join(args_ops.fastas, "tmp")
    for gff in os.listdir(gff_path):
        if args_ops.project_strain in gff:
            gff_file = os.path.join(gff_path, gff)
            break
    for fa in os.listdir(fasta_path):
        if args_ops.project_strain in fa:
            fasta_file = os.path.join(fasta_path, fa)
            break
    Helper().check_uni_attributes(gff_file)
    optimization(wig_path, fasta_file, gff_file, args_ops)
    Helper().remove_all_content(os.path.join(args_ops.output_folder,
                                "optimized_TSSpredator"), "config", "file")
    Helper().remove_all_content(os.path.join(args_ops.output_folder,
                                "optimized_TSSpredator"), "Master", "dir")
    Helper().remove_tmp(args_ops.wigs)
    Helper().remove_tmp(args_ops.gffs)
    Helper().remove_tmp(args_ops.fastas)
예제 #2
0
def optimize_tss(args_ops):
    if len(os.listdir(args_ops.gffs)) == 0:
        print("Error: There is no gff files!!!")
        sys.exit()
    if len(os.listdir(args_ops.fastas)) == 0:
        print("Error: There is no fasta files!!!")
        sys.exit()
    if len(os.listdir(args_ops.wigs)) == 0:
        print("Error: There is no wiggle files!!!")
        sys.exit()
    Multiparser().parser_wig(args_ops.wigs)
    Multiparser().parser_gff(args_ops.gffs, None)
    Multiparser().parser_fasta(args_ops.fastas)
    gff_path = os.path.join(args_ops.gffs, "tmp")
    wig_path = os.path.join(args_ops.wigs, "tmp")
    fasta_path = os.path.join(args_ops.fastas, "tmp")
    for gff in os.listdir(gff_path):
        if args_ops.project_strain in gff:
            gff_file = os.path.join(gff_path, gff)
            break
    for fa in os.listdir(fasta_path):
        if args_ops.project_strain in fa:
            fasta_file = os.path.join(fasta_path, fa)
            break
    Helper().check_uni_attributes(gff_file)
    optimization(wig_path, fasta_file, gff_file, args_ops)
    Helper().remove_all_content(
        os.path.join(args_ops.output_folder, "optimized_TSSpredator"),
        "config", "file")
    Helper().remove_all_content(
        os.path.join(args_ops.output_folder, "optimized_TSSpredator"),
        "Master", "dir")
    Helper().remove_tmp_dir(args_ops.wigs)
    Helper().remove_tmp_dir(args_ops.gffs)
    Helper().remove_tmp_dir(args_ops.fastas)
예제 #3
0
 def test_optimization(self):
     ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
     ot.convert2gff = Mock_func().mock_convert2gff
     if not os.path.exists(os.path.join(self.test_folder, "wigs")):
         os.mkdir(os.path.join(self.test_folder, "wigs"))
     wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
     if not os.path.exists(wig_folder):
         os.mkdir(wig_folder)
     fasta = os.path.join(self.test_folder, "aaa.fa")
     gff = os.path.join(self.test_folder, "aaa.gff")
     gen_file(fasta, self.example.fasta)
     gen_file(gff, self.example.gff_file)
     output_prefix = ["test_1"]
     args = self.mock_args.mock()
     args.libs = self.example.libs
     args.cores = 1
     args.cluster = 3
     args.program = "TSS"
     args.project_strain = "aaa"
     args.replicate = "all_1"
     args.utr = 200
     args.steps = 2
     args.gene_length = 2000
     args.height = 0.9
     args.height_reduction = 0.8
     args.factor = 0.9
     args.factor_reduction = 0.8
     args.base_height = 0.01
     args.enrichment = 0.5
     args.processing = 0.5
     args.length = None
     args.replicate_name = "test"
     args.tsspredator_path = "test"
     args.manual = os.path.join(self.test_folder, "manual.gff")
     gen_file(args.manual, self.example.manual_file)
     log = open(os.path.join(self.test_folder, "test.log"), "w")
     args.output_folder = self.test_folder
     os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator"))
     ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa",
                     log)
     self.assertTrue(
         os.path.exists(
             os.path.join(self.test_folder, "optimized_TSSpredator",
                          "stat_aaa.csv")))
 def test_optimization(self):
     ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle
     ot.convert2gff = Mock_func().mock_convert2gff
     if not os.path.exists(os.path.join(self.test_folder, "wigs")):
         os.mkdir(os.path.join(self.test_folder, "wigs"))
     wig_folder = os.path.join(self.test_folder, "wigs", "tmp")
     if not os.path.exists(wig_folder):
         os.mkdir(wig_folder)
     fasta = os.path.join(self.test_folder, "aaa.fa")
     gff = os.path.join(self.test_folder, "aaa.gff")
     gen_file(fasta, self.example.fasta)
     gen_file(gff, self.example.gff_file)
     output_prefix = ["test_1"]
     args = self.mock_args.mock()
     args.libs = self.example.libs
     args.cores = 1
     args.cluster = 3
     args.program = "TSS"
     args.project_strain = "aaa"
     args.replicate = "all_1"
     args.utr = 200
     args.steps = 2
     args.gene_length = 2000
     args.height = 0.9
     args.height_reduction = 0.8
     args.factor = 0.9
     args.factor_reduction = 0.8
     args.base_height = 0.01
     args.enrichment = 0.5
     args.processing = 0.5
     args.length = None
     args.replicate_name = "test"
     args.tsspredator_path = "test"
     args.manual = os.path.join(self.test_folder, "manual.gff")
     gen_file(args.manual, self.example.manual_file)
     log = open(os.path.join(self.test_folder, "test.log"), "w")
     args.output_folder = self.test_folder
     os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator"))
     ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa", log)
     self.assertTrue(os.path.exists(os.path.join(
         self.test_folder, "optimized_TSSpredator", "stat_aaa.csv")))
예제 #5
0
def optimize_tss(args_ops, log):
    if len(os.listdir(args_ops.gffs)) == 0:
        print("Error: There is no gff file!")
        sys.exit()
    if len(os.listdir(args_ops.fastas)) == 0:
        print("Error: There is no fasta file!")
        sys.exit()
    if len(os.listdir(args_ops.wigs)) == 0:
        print("Error: There is no wiggle file!")
        sys.exit()
    Multiparser().parser_wig(args_ops.wigs)
    Multiparser().parser_gff(args_ops.gffs, None)
    Multiparser().parser_fasta(args_ops.fastas)
    Multiparser().parser_gff(args_ops.manuals, None)
    gff_path = os.path.join(args_ops.gffs, "tmp")
    wig_path = os.path.join(args_ops.wigs, "tmp")
    fasta_path = os.path.join(args_ops.fastas, "tmp")
    manual_path = os.path.join(args_ops.manuals, "tmp")
    if "all" not in args_ops.strain_lengths.keys():
        for strain in args_ops.strain_lengths.keys():
            detect = False
            for man in os.listdir(manual_path):
                if strain == man.replace(".gff", ""):
                    detect = True
                    log.write("The manually-curated set is found - "
                              "{0}\n".format(os.path.join(manual_path, man)))
            if not detect:
                log.write(
                    "The manually-curated set of {0} is not found.\n".format(
                        strain))
                print("Error: There are genomes in --genome_lengths "
                      "which is not contained in manually-detected "
                      "TSS gff files!")
                sys.exit()
    for man in os.listdir(manual_path):
        run = False
        prefix = man.replace(".gff", "")
        man_file = os.path.join(manual_path, man)
        if (prefix in args_ops.strain_lengths.keys()):
            length = args_ops.strain_lengths[prefix]
            run = True
        elif ("all" in args_ops.strain_lengths.keys()):
            length = "all"
            run = True
        log.write("The comparing sequence region of {0} is ".format(
            prefix, length))
        if run:
            for gff in os.listdir(gff_path):
                if (gff[:-4] == prefix) and (".gff" in gff):
                    gff_file = os.path.join(gff_path, gff)
                    break
            for fa in os.listdir(fasta_path):
                if (".".join(fa.split(".")[:-1]) == prefix) and (".fa" in fa):
                    fasta_file = os.path.join(fasta_path, fa)
                    break
            if length == "all":
                length = get_length(fasta_file)
            log.write(str(length) + "\n")
            Helper().check_uni_attributes(gff_file)
            log.write("Running optimize_TSSpredator.py for optimization.\n")
            optimization(wig_path, fasta_file, gff_file, args_ops, man_file,
                         length, prefix, log)
            Helper().remove_all_content(
                os.path.join(args_ops.output_folder, "optimized_TSSpredator"),
                "config", "file")
            Helper().remove_all_content(
                os.path.join(args_ops.output_folder, "optimized_TSSpredator"),
                "Master", "dir")
    Helper().remove_tmp_dir(args_ops.wigs)
    Helper().remove_tmp_dir(args_ops.gffs)
    Helper().remove_tmp_dir(args_ops.fastas)
    Helper().remove_tmp_dir(args_ops.manuals)