def main(argv):
    options = launch_options.Options(argv, bin_home, truspades_home)
    support.ensure_dir_existence(options.output_dir)
    log = create_log(options)
    dataset_file = os.path.join(options.output_dir, "dataset.info")
    if options.continue_launch:
        dataset = barcode_extraction.ReadDataset(dataset_file, log)
    elif options.input_dirs is not None:
        dataset = generate_dataset(options.input_dirs, log)
        if dataset is None:
            log.info("Error: could not parse dataset from input directories\n")
            sys.exit(1)
        barcode_extraction.print_dataset(dataset, dataset_file, log)
        log.info("Dataset generated. See result in " + dataset_file)
    else:
        dataset = barcode_extraction.ReadDataset(options.dataset_file, log)
        barcode_extraction.print_dataset(dataset, dataset_file, log)
    log_dir = os.path.join(options.output_dir, "logs")
    support.ensure_dir_existence(log_dir)
    # if options.print_commands:
    #     verify_exists(options.output_dir)
    #         print_commands(commands, options)
    if options.mode == "run_truspades":
        RunTruSPAdes(dataset, log_dir, options, log)
    elif options.mode == "construct_subreferences":
        reference_construction.ConstructSubreferences(
            dataset, options.reference, options.output_dir, options.index, options.threads, log=None
        )
    log.info("TruSPAdes launch successfully finished")
    if options.test:
        CheckTestSuccess(options, log)
Example #2
0
def ConstructSubreferences(datasets, reference_file, output_dir, index = None, threads = 1, log = None):
    bwa_command = "bin/spades-bwa"
    if log == None:
        log = logging.getLogger('reference_construction')
        log.setLevel(logging.INFO)
        console = logging.StreamHandler(sys.stderr)
        console.setFormatter(logging.Formatter('%(message)s'))
        console.setLevel(logging.INFO)
        log.addHandler(console)
    support.ensure_dir_existence(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if index == None:
        log.info("Constructing index\n")
        index = alignment.index_bwa(bwa_command, log, reference_file, os.path.join(output_dir, "bwa_index"), "bwtsw")
    sam_dir = os.path.join(output_dir, "alignments")
    log.info("Aligning barcodes\n")
    sam_files = AlignToReference(datasets, sam_dir, bwa_command, log, index, threads)
    subreference_dir = os.path.join(output_dir, "subreferences")
    filtered_dir = os.path.join(output_dir, "filtered")
    support.recreate_dir(subreference_dir)
    support.recreate_dir(filtered_dir)
    log.info("Constructing subreferences")
    subreferences_list = [(barcode_id, ConstructSubreferenceFromSam(barcode_sam)) for barcode_id, barcode_sam in sam_files]
    log.info("Reading reference")
    reference = ReadReference(reference_file)
    log.info("Printing output")
    PrintAll([(barcode, filtered) for barcode, (filtered, subreference) in subreferences_list], reference, filtered_dir)
    PrintAll([(barcode, subreference) for barcode, (filtered, subreference) in subreferences_list], reference, subreference_dir)
    log.info("Subreference construction finished. See results in " + output_dir)
Example #3
0
def RunTruQuast(input_dir, reference_dir, output_dir, threads):
    support.ensure_dir_existence(output_dir)
    if os.path.exists(os.path.join(input_dir, "dataset.info")):
        ids = [
            barcode.id for barcode in barcode_extraction.ReadDataset(
                os.path.join(input_dir, "dataset.info"))
        ]
        files = [
            os.path.join(input_dir, "barcodes", bid, "truseq_long_reads.fasta")
            for bid in ids
        ]
    else:
        files = [
            os.path.join(input_dir, file) for file in os.listdir(input_dir)
            if file.endswith(".fasta") or file.endswith(".fa")
        ]
        ids = [
            f[:f.rfind(".")] for f in os.listdir(input_dir)
            if file.endswith(".fasta") or file.endswith(".fa")
        ]

    barcode_quast_dir = os.path.join(output_dir, "barcode_quast")
    RunBarcodeQuast(zip(ids, files), barcode_quast_dir, reference_dir, threads)
    names, reports = ParseResults(barcode_quast_dir, ids)
    names.append("#partially unaligned")
    values = CollectResults(names, reports)
    results = open(os.path.join(output_dir, "results.tsv"), "w")
    for name in names:
        results.write(name + "\t" + str(int(values[name])) + "\t" +
                      str(values[name] / len(ids)) + "\n")
    results.close()
def main(argv):
    options = launch_options.Options(argv, spades_home, truspades_home, spades_version)
    support.ensure_dir_existence(options.output_dir)
    if options.test and not options.continue_launch:
        support.recreate_dir(options.output_dir)
    log = create_log(options)
    dataset_file = os.path.join(options.output_dir, "dataset.info")
    if options.continue_launch:
        dataset = barcode_extraction.ReadDataset(dataset_file, log)
    elif options.input_dirs is not None:
        dataset = generate_dataset(options.input_dirs, log)
        if dataset is None:
            log.info("Error: could not parse dataset from input directories\n")
            sys.exit(1)
        barcode_extraction.print_dataset(dataset, dataset_file, log)
        log.info("Dataset generated. See result in " + dataset_file)
    else:
        dataset = barcode_extraction.ReadDataset(options.dataset_file, log)
        barcode_extraction.print_dataset(dataset, dataset_file, log)
    log_dir = os.path.join(options.output_dir, "logs")
    support.ensure_dir_existence(log_dir)
    # if options.print_commands:
    #     verify_exists(options.output_dir)
#         print_commands(commands, options)
    if options.mode == "run_truspades":
        RunTruSPAdes(dataset, log_dir, options, log)
    elif options.mode == "construct_subreferences":
        reference_construction.ConstructSubreferences(dataset, options.reference, options.output_dir, options.index, options.threads, log = None)
    log.info("TruSPAdes launch successfully finished")
    if options.test:
        CheckTestSuccess(options, log)
Example #5
0
def RunTruSPAdes(dataset, log_dir, options, log):
    log.info("Launching truSPAdes assembly in " + str(options.threads) +
             " threads")
    log.info("You can find logs for separate barcodes in " + log_dir)
    barcodes_dir = os.path.join(options.output_dir, "barcodes")
    support.ensure_dir_existence(barcodes_dir)
    commands = [(barcode.id,
                 command_line(barcode, barcodes_dir, options.spades_options,
                              options.continue_launch)) for barcode in dataset]
    task = parallel_launcher.ExternalCallTask(os.path.join(log_dir, "{0}.log"),
                                              "", log.name)
    errors = parallel_launcher.run_in_parallel(task, commands, options.threads)
    if errors != 0:
        log.info(str(errors) + " barcodes failed to assemble")
    check_results(dataset, barcodes_dir, log)
    output_base = os.path.join(options.output_dir, "TSLR")
    collect_contigs(dataset, barcodes_dir, output_base, "fasta")
    collect_contigs(dataset, barcodes_dir, output_base, "fastq")
    log.info("Assembled virtual long TruSeq reads can be found in " +
             os.path.join(options.output_dir, "TSLR.fasta"))
    if options.clean:
        SaveContigs(barcodes_dir, dataset, "fasta")
        SaveContigs(barcodes_dir, dataset, "fastq")
        for barcode in dataset:
            shutil.rmtree(os.path.join(barcodes_dir, barcode.id))
Example #6
0
def ConstructSubreferences(datasets, reference_file, output_dir, index = None, threads = 1, log = None):
    bwa_command = "bin/bwa-spades"
    if log == None:
        log = logging.getLogger('reference_construction')
        log.setLevel(logging.INFO)
        console = logging.StreamHandler(sys.stderr)
        console.setFormatter(logging.Formatter('%(message)s'))
        console.setLevel(logging.INFO)
        log.addHandler(console)
    support.ensure_dir_existence(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if index == None:
        log.info("Constructing index\n")
        index = alignment.index_bwa(bwa_command, log, reference_file, os.path.join(output_dir, "bwa_index"), "bwtsw")
    sam_dir = os.path.join(output_dir, "alignments")
    log.info("Aligning barcodes\n")
    sam_files = AlignToReference(datasets, sam_dir, bwa_command, log, index, threads)
    subreference_dir = os.path.join(output_dir, "subreferences")
    filtered_dir = os.path.join(output_dir, "filtered")
    support.recreate_dir(subreference_dir)
    support.recreate_dir(filtered_dir)
    log.info("Constructing subreferences")
    subreferences_list = [(barcode_id, ConstructSubreferenceFromSam(barcode_sam)) for barcode_id, barcode_sam in sam_files]
    log.info("Reading reference")
    reference = ReadReference(reference_file)
    log.info("Printing output")
    PrintAll([(barcode, filtered) for barcode, (filtered, subreference) in subreferences_list], reference, filtered_dir)
    PrintAll([(barcode, subreference) for barcode, (filtered, subreference) in subreferences_list], reference, subreference_dir)
    log.info("Subreference construction finished. See results in " + output_dir)
Example #7
0
def SaveContigs(barcodes_dir, dataset, format):
    contig_dir = os.path.join(barcodes_dir, format)
    support.ensure_dir_existence(contig_dir)
    for barcode in dataset:
        if os.path.isfile(
                os.path.join(barcodes_dir, barcode.id,
                             "truseq_long_reads." + format)):
            shutil.copyfileobj(
                open(
                    os.path.join(barcodes_dir, barcode.id,
                                 "truseq_long_reads." + format), "rb"),
                gzip.open(
                    os.path.join(contig_dir,
                                 barcode.id + "." + format + ".gz"), "wb"))
def RunTruSPAdes(dataset, log_dir, options, log):
    log.info("Launching truSPAdes assembly in " + str(options.threads) + " threads")
    log.info("You can find logs for separate barcodes in " + log_dir)
    barcodes_dir = os.path.join(options.output_dir, "barcodes")
    support.ensure_dir_existence(barcodes_dir)
    commands = [
        (barcode.id, command_line(barcode, barcodes_dir, options.spades_options, options.continue_launch))
        for barcode in dataset
    ]
    task = parallel_launcher.ExternalCallTask(os.path.join(log_dir, "{0}.log"), "", log.name)
    errors = parallel_launcher.run_in_parallel(task, commands, options.threads)
    if errors != 0:
        log.info(str(errors) + " barcodes failed to assemble")
    check_results(dataset, barcodes_dir, log)
    output_base = os.path.join(options.output_dir, "TSLR")
    collect_contigs(dataset, barcodes_dir, output_base, "fasta")
    collect_contigs(dataset, barcodes_dir, output_base, "fastq")
    log.info("Assembled virtual long TruSeq reads can be found in " + os.path.join(options.output_dir, "TSLR.fasta"))
Example #9
0
def test_args():
    '''
    Put default args for test case.
    '''
    global options_storage

    import tempfile
    options_storage.output_dir = tempfile.mkdtemp("gadma_test_dir")
    options_storage.output_dir = support.ensure_dir_existence(
        options_storage.output_dir)

    options_storage.input_file = os.path.join(support.get_home_dir(), "..",
                                              "fs_examples", "test.fs")
    options_storage.input_data, options_storage.ns, options_storage.pop_labels = support.load_spectrum(
        options_storage.input_file, None, None)
    options_storage.ns = np.array(options_storage.ns)
    options_storage.number_of_populations = 1
    options_storage.linked_snp = False
    options_storage.theta = 0.37976
    options_storage.gen_time = 25
    options_storage.initial_structure = np.array([1])
    options_storage.final_structure = np.array([2])
    options_storage.size_of_generation = 5
    options_storage.fracs = [
        float(x) for x in options_storage.fracs.split(",")
    ]
    options_storage.frac_of_old_models = options_storage.fracs[0]
    options_storage.frac_of_mutated_models = options_storage.fracs[1]
    options_storage.frac_of_crossed_models = options_storage.fracs[2]
    options_storage.optimize_name = 'hill_climbing'
    options_storage.moments_scenario = True

    options_storage.relative_params = False
    options_storage.dadi_pts = [20, 30, 40]
    options_storage.repeats = 2
    options_storage.processes = 2
    options_storage.epsilon = 1
    options_storage.test = True
    options_storage.multinom = True

    options_storage.final_check()

    return options_storage
Example #10
0
    def check(self):
        '''
        Check correctness of parameters. Unless throws error.
        '''
        if self.multinom is None:
            if self.model_func_file is None:
                self.multinom = False
            else:
                self.multinom = True

        if self.pop_labels is not None:
            self.pop_labels = [x.strip() for x in self.pop_labels.split(',')]
        if self.ns is not None:
            self.ns = support.check_comma_sep_list(self.ns)

        self.input_file = support.check_file_existence(self.input_file)
        
        if self.resume_dir is not None:
            self.resume_dir = support.check_dir_existence(self.resume_dir)
        if self.resume_dir is not None and self.output_dir is None:
            self.output_dir = support.ensure_dir_existence(
                self.resume_dir + "_resumed", check_emptiness=True)
        elif self.output_dir is None:
            support.error("Parameter `Output directory` is required")
        else:
            self.output_dir = support.ensure_dir_existence(
                self.output_dir, check_emptiness=True)

        if self.input_file is None:
            support.error(
                "Parameter `Input file` is required")
        if self.theta is None:
            support.warning(
                "`Theta0` is not specified. It would be 1.0.")
        if self.gen_time is None:
            support.warning(
                "`Time for one generation` is not specified. Time will be in genetic units.")

        self.input_data, self.ns, self.pop_labels = support.load_spectrum(
                self.input_file, self.ns, self.pop_labels)
        self.ns = np.array(self.ns)
        self.number_of_populations = len(self.ns)

        # Linked or unlinked data
        if not self.linked_snp and self.boot_dir is not None:
            support.warning(
                    "SNP's are marked as unlinked, so the directory with bootstrap will be ignored.")
        elif self.linked_snp:
            if self.boot_dir is not None:
                self.boot_dir = support.check_dir_existence(self.boot_dir)
                self.boots = gadma.Inference.load_bootstrap_data_from_dir(self.boot_dir, self.ns, self.pop_labels)

        # Custom model
        if self.model_func_file is not None:
            self.model_func_file = support.check_file_existence(self.model_func_file)
            file_with_model_func = imp.load_source('module', self.model_func_file)
            try:
                self.model_func = file_with_model_func.model_func  
            except:
                support.error(
                    "File " + self.model_func_file + ' does not contain function named `model_func`.')

        
        if self.model_func_file is not None:
            if self.p_ids is not None:
                self.p_ids = support.check_comma_sep_list(self.p_ids, is_int=False)
                
        self.fracs = [float(x) for x in self.fracs.split(",")]
        if len(self.fracs) != 3:
            support.error(
                "length of `Fractions` (Parameters of genetic algorithm) must be 3")
        self.frac_of_old_models = self.fracs[0]
        self.frac_of_mutated_models = self.fracs[1]
        self.frac_of_crossed_models = self.fracs[2]

        if self.moments_scenario and self.dadi_pts is not None:
            support.warning(
                "Moments doesn't use --pts argument, so it would be ignored")
        if self.dadi_pts is None:
            max_n = max(self.ns)
            self.dadi_pts = [max_n, max_n + 10, max_n + 20]
        else:
            self.dadi_pts = support.check_comma_sep_list(self.dadi_pts)

        self.put_default_structures()

        self.final_check()
Example #11
0
def SaveContigs(barcodes_dir, dataset, format):
    contig_dir = os.path.join(barcodes_dir, format)
    support.ensure_dir_existence(contig_dir)
    for barcode in dataset:
        if os.path.isfile(os.path.join(barcodes_dir, barcode.id, "truseq_long_reads." + format)):
            shutil.copyfileobj(open(os.path.join(barcodes_dir, barcode.id, "truseq_long_reads." + format), "rb"), gzip.open(os.path.join(contig_dir, barcode.id + "." + format + ".gz"), "wb"))