Ejemplo n.º 1
0
    def generate_config(self, cfg):
        data_dir = os.path.join(cfg.output_dir, "K%d" % self.K)
        saves_dir = os.path.join(data_dir, "saves")
        dst_configs = os.path.join(data_dir, "configs")

        if self.get_stage(self.short_name) == options_storage.BASE_STAGE:
            if not os.path.isdir(data_dir):
                os.makedirs(data_dir)

            dir_util._path_created = {}  # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree
            dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "debruijn"), dst_configs, preserve_times=False)

        if self.prev_K:
            additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % self.prev_K, "simplified_contigs.fasta")
        else:
            additional_contigs_fname = None

        if "read_buffer_size" in cfg.__dict__:
            # FIXME why here???
            process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"),
                                          {"read_buffer_size": cfg.read_buffer_size}, self.log)
        if "scaffolding_mode" in cfg.__dict__:
            # FIXME why here???
            process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"),
                                          {"scaffolding_mode": cfg.scaffolding_mode}, self.log)

        prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), self.log)
        prepare_config_construction(os.path.join(dst_configs, "construction.info"), self.log)
        cfg_fn = os.path.join(dst_configs, "config.info")
        prepare_config_spades(cfg_fn, cfg, self.log, additional_contigs_fname, self.K, self.get_stage(self.short_name),
                              saves_dir, self.last_one, self.bin_home)
def PrepareConfigs(params, log):
    CopyConfigs(params, log)
    param_dict = CreateParamDict(params)
    if not os.path.exists(params.config_file):
        log.info("ERROR: config file was not found")
        sys.exit(1)
    process_cfg.substitute_params(params.config_file, param_dict, log)
Ejemplo n.º 3
0
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K):
    data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K)
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)

    log.info("\n== Running scaffold correction \n")
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    process_cfg.substitute_params(os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K)
    command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name]
    add_configs(command, dst_configs)
    log.info(str(command))
    support.sys_call(command, log)
def PrepareConfigs(params, log):
    CopyConfigs(params, log)
    param_dict = CreateParamDict(params)
    if not os.path.exists(params.config_file):
        log.info("ERROR: config file was not found")
        sys.exit(1)
    process_cfg.substitute_params(params.config_file, param_dict, log)
Ejemplo n.º 5
0
def run_scaffold_correction(configs_dir, execution_home, cfg, log, K):
    data_dir = os.path.join(cfg.output_dir, "SCC")
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running scaffold correction \n")
    latest = os.path.join(cfg.output_dir, "K%d" % K)
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, scaffolds_file)
    command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name]
    log.info(str(command))
    support.sys_call(command, log)
Ejemplo n.º 6
0
def prepare_config_rnaspades(filename, log):
    if not options_storage.rna:
        return
    subst_dict = dict()
    subst_dict["ss_enabled"] = bool_to_str(options_storage.strand_specific is not None)
    subst_dict["antisense"] = bool_to_str(options_storage.strand_specific)
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 7
0
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["run_mode"] = "false"
    if "diploid_mode" in cfg.__dict__:
        subst_dict["diploid_mode"] = bool_to_str(cfg.diploid_mode)
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    if additional_contigs_fname:
        subst_dict["additional_contigs"] = process_cfg.process_spaces(additional_contigs_fname)
        subst_dict["use_additional_contigs"] = bool_to_str(True)
    else:
        subst_dict["use_additional_contigs"] = bool_to_str(False)
    subst_dict["main_iteration"] = bool_to_str(last_one)
    subst_dict["entry_point"] = stage
    subst_dict["load_from"] = saves_dir
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["gap_closer_enable"] = bool_to_str(last_one)
    subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable)
#    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory
    subst_dict["correct_mismatches"] = bool_to_str(last_one)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "careful" in cfg.__dict__:
        subst_dict["mismatch_careful"] = bool_to_str(cfg.careful)
    if "pacbio_mode" in cfg.__dict__:
        subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode)
        subst_dict["pacbio_reads"] = process_cfg.process_spaces(cfg.pacbio_reads)

    process_cfg.substitute_params(filename, subst_dict, log)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, K):
    data_dir = os.path.join(cfg.output_dir, "SCC")
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running scaffold correction \n")
    latest = os.path.join(cfg.output_dir, "K%d" % K)
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, scaffolds_file)
    command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name]
    log.info(str(command))
    support.sys_call(command, log)
Ejemplo n.º 9
0
def prepare_config_spades(filename, cfg, log, use_additional_contigs, K,
                          last_one):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["run_mode"] = "false"
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["additional_contigs"] = process_cfg.process_spaces(
        cfg.additional_contigs)
    subst_dict["entry_point"] = "construction"
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["gap_closer_enable"] = bool_to_str(last_one)
    subst_dict["paired_mode"] = bool_to_str(last_one and cfg.paired_mode)
    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    subst_dict["use_additional_contigs"] = bool_to_str(use_additional_contigs)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory
    subst_dict["correct_mismatches"] = bool_to_str(last_one)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "careful" in cfg.__dict__:
        subst_dict["mismatch_careful"] = bool_to_str(cfg.careful)

    process_cfg.substitute_params(filename, subst_dict, log)
    def generate_config(self, cfg):
        K = cfg.iterative_K[-1]
        latest = os.path.join(cfg.output_dir, "K%d" % K)
        K = options_storage.SCC_K
        data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K)
        saves_dir = os.path.join(data_dir, "saves")
        dst_configs = os.path.join(data_dir, "configs")
        cfg_file_name = os.path.join(dst_configs, "config.info")

        if os.path.isdir(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "debruijn"),
                           dst_configs,
                           preserve_times=False)

        scaffolds_file = os.path.join(latest, "scaffolds.fasta")
        if "read_buffer_size" in cfg.__dict__:
            construction_cfg_file_name = os.path.join(dst_configs,
                                                      "construction.info")
            process_cfg.substitute_params(
                construction_cfg_file_name,
                {"read_buffer_size": cfg.read_buffer_size}, self.log)
        process_cfg.substitute_params(
            os.path.join(dst_configs, "moleculo_mode.info"),
            {"scaffolds_file": scaffolds_file}, self.log)
        prepare_config_scaffold_correction(cfg_file_name, cfg, self.log,
                                           saves_dir, K)
Ejemplo n.º 11
0
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K):
    data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K)
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"),
                       dst_configs,
                       preserve_times=False)

    log.info("\n== Running scaffold correction \n")
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs,
                                                  "construction.info")
        process_cfg.substitute_params(
            construction_cfg_file_name,
            {"read_buffer_size": cfg.read_buffer_size}, log)
    process_cfg.substitute_params(
        os.path.join(dst_configs, "moleculo_mode.info"),
        {"scaffolds_file": scaffolds_file}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K)
    command = [
        os.path.join(execution_home, "spades-truseq-scfcorrection"),
        cfg_file_name
    ]
    add_configs(command, dst_configs)
    log.info(str(command))
    support.sys_call(command, log)
Ejemplo n.º 12
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1:
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs)
        # removing template configs
        for root, dirs, files in os.walk(dst_configs):
            for cfg_file in files:
                cfg_file = os.path.join(root, cfg_file)
                if cfg_file.endswith('.info.template'):
                    if os.path.isfile(cfg_file.split('.template')[0]):
                        os.remove(cfg_file)
                    else:
                        os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one)

    command = [os.path.join(execution_home, "spades"), cfg_file_name]

## this code makes sense for src/debruijn/simplification.cpp: corrected_and_save_reads() function which is not used now
#    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
#    if os.path.isdir(bin_reads_dir):
#        if glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
#            for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
#                cor_index = cor_filename.rfind("_cor")
#                new_bin_filename = cor_filename[:cor_index] + cor_filename[cor_index + 4:]
#                shutil.move(cor_filename, new_bin_filename)
    support.sys_call(command, log)
def prepare_config_bgcspades(filename, cfg, log):
    if not "set_of_hmms" in cfg.__dict__:
        return
    subst_dict = dict()
    subst_dict["set_of_hmms"] = cfg.set_of_hmms
    if options_storage.args.bio:
        subst_dict["component_size_part"] = 1
        subst_dict["set_copynumber"] = bool_to_str(True)
        subst_dict["start_only_from_tips"] = bool_to_str(True)
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 14
0
def prepare_config_mulksg(filename, cfg, log, additional_contigs_fname, K,
                          stage, saves_dir, last_one, execution_home):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    if additional_contigs_fname:
        subst_dict["additional_contigs"] = process_cfg.process_spaces(
            additional_contigs_fname)
        subst_dict["use_additional_contigs"] = bool_to_str(True)
    else:
        subst_dict["use_additional_contigs"] = bool_to_str(False)
    subst_dict["main_iteration"] = bool_to_str(last_one)
    subst_dict["entry_point"] = stage
    subst_dict["load_from"] = saves_dir
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= 55)
    subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable)
    #    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    #TODO: If on multi node, max_threads and max_memory need to change!!!
    if K == cfg.iterative_K[-1]:
        subst_dict["max_threads"] = min(multiprocessing.cpu_count(),
                                        cfg.max_threads)
        subst_dict["max_memory"] = cfg.max_memory
    else:
        subst_dict["max_threads"] = min(
            multiprocessing.cpu_count(),
            cfg.max_threads)  #/(len(cfg.iterative_K)-1)
        subst_dict["max_memory"] = cfg.max_memory / (len(cfg.iterative_K) - 1)
    subst_dict["save_gp"] = bool_to_str(cfg.save_gp)
    if (not last_one):
        subst_dict["correct_mismatches"] = bool_to_str(False)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "pacbio_mode" in cfg.__dict__:
        subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode)
        subst_dict["pacbio_reads"] = process_cfg.process_spaces(
            cfg.pacbio_reads)
    if cfg.cov_cutoff == "off":
        subst_dict["use_coverage_threshold"] = bool_to_str(False)
    else:
        subst_dict["use_coverage_threshold"] = bool_to_str(True)
        if cfg.cov_cutoff == "auto":
            subst_dict["coverage_threshold"] = 0.0
        else:
            subst_dict["coverage_threshold"] = cfg.cov_cutoff
    if cfg.lcer_cutoff is not None:
        subst_dict["lcer_enabled"] = bool_to_str(True)
        subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff

    if "series_analysis" in cfg.__dict__:
        subst_dict["series_analysis"] = cfg.series_analysis
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 15
0
def prepare_config(config_fname, ds_args, log):
    args_dict = dict()
    args_dict["tails_lie_on_bulges"] = process_cfg.bool_to_str(ds_args.allow_gaps)
    args_dict["align_bulge_sides"] = process_cfg.bool_to_str(ds_args.weak_align)
    args_dict["haplocontigs"] = process_cfg.process_spaces(ds_args.haplocontigs)
    args_dict["output_dir"] = process_cfg.process_spaces(ds_args.output_dir)
    args_dict["developer_mode"] = "false" #process_cfg.bool_to_str(False)
    args_dict["tmp_dir"] = process_cfg.process_spaces(ds_args.tmp_dir)
    args_dict["max_threads"] = ds_args.max_threads
    args_dict["max_memory"] = ds_args.max_memory
    process_cfg.substitute_params(config_fname, args_dict, log)
Ejemplo n.º 16
0
def create_mulksg_configs(configs_dir, execution_home, cfg, log, K_values_list,
                          additional_contigs_fname, last_one,
                          ext_python_modules_home):
    command_list = []
    contig_files = []
    for K in K_values_list:
        # RL = get_read_length(cfg.output_dir, K, ext_python_modules_home, log)
        # if K >= RL:
        #     support.warning("Value of K (%d) exceeded estimated read length (%d)" %
        #                             (K, RL), log)
        data_dir = os.path.join(cfg.output_dir, "K%d" % K)
        stage = BASE_STAGE
        saves_dir = os.path.join(data_dir, 'saves')
        dst_configs = os.path.join(data_dir, "configs")

        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util._path_created = {
        }  # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree
        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"),
                           dst_configs,
                           preserve_times=False)

        log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
        if "read_buffer_size" in cfg.__dict__:
            #FIXME why here???
            process_cfg.substitute_params(
                os.path.join(dst_configs, "construction.info"),
                {"read_buffer_size": cfg.read_buffer_size}, log)
        if "scaffolding_mode" in cfg.__dict__:
            #FIXME why here???
            process_cfg.substitute_params(
                os.path.join(dst_configs, "pe_params.info"),
                {"scaffolding_mode": cfg.scaffolding_mode}, log)

        prepare_config_rnamulksg(os.path.join(dst_configs, "rna_mode.info"),
                                 log)
        prepare_config_construction(
            os.path.join(dst_configs, "construction.info"), log)
        cfg_fn = os.path.join(dst_configs, "config.info")
        prepare_config_mulksg(cfg_fn, cfg, log, additional_contigs_fname, K,
                              stage, saves_dir, last_one, execution_home)

        command = [os.path.join(execution_home, "mulksg-core"), cfg_fn]
        # print("command is: ", command)
        add_configs(command, dst_configs)
        command_list.append(command)
        contig_files.append(
            os.path.join(cfg.output_dir, "K%d" % K,
                         "simplified_contigs.fasta"))
    return command_list, contig_files
Ejemplo n.º 17
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1 \
                and options_storage.restart_from.startswith("k%d:" % K):
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util._path_created = {}  # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree
        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log)
    if "scaffolding_mode" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log)

    prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log)
    prepare_config_construction(os.path.join(dst_configs, "construction.info"), log)
    cfg_fn = os.path.join(dst_configs, "config.info")
    prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)

    command = [os.path.join(execution_home, "spades-core"), cfg_fn]

    add_configs(command, dst_configs)

    #print("Calling: " + " ".join(command))
    support.sys_call(command, log)
Ejemplo n.º 18
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1 \
                and options_storage.restart_from.startswith("k%d:" % K):
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util._path_created = {}  # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree
        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log)
    if "scaffolding_mode" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log)

    prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log)
    prepare_config_construction(os.path.join(dst_configs, "construction.info"), log)
    cfg_fn = os.path.join(dst_configs, "config.info")
    prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)

    command = [os.path.join(execution_home, "spades-core"), cfg_fn]

    add_configs(command, dst_configs)

    #print("Calling: " + " ".join(command))
    support.sys_call(command, log)
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K,
                          stage, saves_dir, last_one, execution_home):
    subst_dict = dict()
    subst_dict["K"] = str(K)
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    if additional_contigs_fname:
        subst_dict["additional_contigs"] = process_cfg.process_spaces(
            additional_contigs_fname)
        subst_dict["use_additional_contigs"] = bool_to_str(True)
    else:
        subst_dict["use_additional_contigs"] = bool_to_str(False)
    subst_dict["main_iteration"] = bool_to_str(last_one)
    subst_dict["entry_point"] = stage
    subst_dict["load_from"] = saves_dir
    if "checkpoints" in cfg.__dict__:
        subst_dict["checkpoints"] = cfg.checkpoints
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["time_tracer_enabled"] = bool_to_str(cfg.time_tracer)
    subst_dict["gap_closer_enable"] = bool_to_str(
        last_one or K >= options_storage.GAP_CLOSER_ENABLE_MIN_K)
    subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable)
    #    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory
    subst_dict["save_gp"] = bool_to_str(cfg.save_gp)
    if not last_one:
        subst_dict["correct_mismatches"] = bool_to_str(False)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "pacbio_mode" in cfg.__dict__:
        subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode)
        subst_dict["pacbio_reads"] = process_cfg.process_spaces(
            cfg.pacbio_reads)
    if cfg.cov_cutoff == "off":
        subst_dict["use_coverage_threshold"] = bool_to_str(False)
    else:
        subst_dict["use_coverage_threshold"] = bool_to_str(True)
        if cfg.cov_cutoff == "auto":
            subst_dict["coverage_threshold"] = 0.0
        else:
            subst_dict["coverage_threshold"] = cfg.cov_cutoff
    if cfg.lcer_cutoff is not None:
        subst_dict["lcer_enabled"] = bool_to_str(True)
        subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff

    if "series_analysis" in cfg.__dict__:
        subst_dict["series_analysis"] = cfg.series_analysis
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 20
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1:
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)
        # removing template configs
        for root, dirs, files in os.walk(dst_configs):
            for cfg_file in files:
                cfg_file = os.path.join(root, cfg_file)
                if cfg_file.endswith('.info.template'):
                    if os.path.isfile(cfg_file.split('.template')[0]):
                        os.remove(cfg_file)
                    else:
                        os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)

    command = [os.path.join(execution_home, "spades"), cfg_file_name]
    support.sys_call(command, log)
Ejemplo n.º 21
0
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    if additional_contigs_fname:
        subst_dict["additional_contigs"] = process_cfg.process_spaces(additional_contigs_fname)
        subst_dict["use_additional_contigs"] = bool_to_str(True)
    else:
        subst_dict["use_additional_contigs"] = bool_to_str(False)
    subst_dict["main_iteration"] = bool_to_str(last_one)
    subst_dict["entry_point"] = stage
    subst_dict["load_from"] = saves_dir
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= 55)
    subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable)
#    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory
    subst_dict["save_gp"] = bool_to_str(cfg.save_gp)
    if (not last_one):
        subst_dict["correct_mismatches"] = bool_to_str(False)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "pacbio_mode" in cfg.__dict__:
        subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode)
        subst_dict["pacbio_reads"] = process_cfg.process_spaces(cfg.pacbio_reads)
    if cfg.cov_cutoff == "off":
        subst_dict["use_coverage_threshold"] = bool_to_str(False)
    else:
        subst_dict["use_coverage_threshold"] = bool_to_str(True)
        if cfg.cov_cutoff == "auto":
            subst_dict["coverage_threshold"] = 0.0
        else:
            subst_dict["coverage_threshold"] = cfg.cov_cutoff
    if cfg.lcer_cutoff is not None:
        subst_dict["lcer_enabled"] = bool_to_str(True)
        subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff

    #TODO: make something about spades.py and config param substitution 
    if "bwa_paired" in cfg.__dict__:
        subst_dict["bwa_enable"] = bool_to_str(True)
    subst_dict["path_to_bwa"] =  os.path.join(execution_home, "bwa-spades")
    if "series_analysis" in cfg.__dict__:
        subst_dict["series_analysis"] = cfg.series_analysis
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 22
0
def prepare_config(config_fname, ds_args, log):
    args_dict = dict()
    args_dict["tails_lie_on_bulges"] = process_cfg.bool_to_str(not ds_args.allow_gaps)
    args_dict["align_bulge_sides"] = process_cfg.bool_to_str(not ds_args.weak_align)
    args_dict["haplocontigs"] = process_cfg.process_spaces(ds_args.haplocontigs)
    args_dict["output_dir"] = process_cfg.process_spaces(ds_args.output_dir)
    args_dict["developer_mode"] = process_cfg.bool_to_str(ds_args.dev_mode)
    args_dict["tmp_dir"] = process_cfg.process_spaces(ds_args.tmp_dir)
    args_dict["max_threads"] = ds_args.max_threads
    args_dict["max_memory"] = ds_args.max_memory
    args_dict["output_base"] = ""
    args_dict["ha_enabled"] = process_cfg.bool_to_str(ds_args.haplotype_assembly)
    args_dict["K"] = str(ds_args.k)
    args_dict['saves'] = ds_args.saves
    args_dict['entry_point'] = ds_args.start_from
    process_cfg.substitute_params(config_fname, args_dict, log)
Ejemplo n.º 23
0
def prepare_config_bh(filename, cfg, log):
    subst_dict = dict()

    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset_yaml_filename)
    subst_dict["input_working_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    subst_dict["output_dir"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["general_max_iterations"] = cfg.max_iterations
    subst_dict["general_max_nthreads"] = cfg.max_threads
    subst_dict["count_merge_nthreads"] = cfg.max_threads
    subst_dict["bayes_nthreads"] = cfg.max_threads
    subst_dict["expand_nthreads"] = cfg.max_threads
    subst_dict["correct_nthreads"] = cfg.max_threads
    subst_dict["general_hard_memory_limit"] = cfg.max_memory
    if "qvoffset" in cfg.__dict__:
        subst_dict["input_qvoffset"] = cfg.qvoffset
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 24
0
def prepare_config(config_fname, ds_args, log):
    args_dict = dict()
    args_dict["tails_lie_on_bulges"] = process_cfg.bool_to_str(not ds_args.allow_gaps)
    args_dict["align_bulge_sides"] = process_cfg.bool_to_str(not ds_args.weak_align)
    args_dict["haplocontigs"] = process_cfg.process_spaces(ds_args.haplocontigs)
    args_dict["output_dir"] = process_cfg.process_spaces(ds_args.output_dir)
    args_dict["developer_mode"] = process_cfg.bool_to_str(ds_args.dev_mode)
    args_dict["tmp_dir"] = process_cfg.process_spaces(ds_args.tmp_dir)
    args_dict["max_threads"] = ds_args.max_threads
    args_dict["max_memory"] = ds_args.max_memory
    args_dict["output_base"] = ""
    args_dict["ha_enabled"] = process_cfg.bool_to_str(ds_args.haplotype_assembly)
    args_dict["K"] = str(ds_args.k)
    args_dict['saves'] = ds_args.saves
    args_dict['entry_point'] = ds_args.start_from
    process_cfg.substitute_params(config_fname, args_dict, log)
Ejemplo n.º 25
0
def prepare_config_scaffold_correction(filename, cfg, log, saves_dir, K):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(os.path.join(cfg.output_dir, "SCC"))
    subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    subst_dict["use_additional_contigs"] = bool_to_str(False)
    subst_dict["main_iteration"] = bool_to_str(False)
    subst_dict["entry_point"] = BASE_STAGE
    subst_dict["load_from"] = saves_dir
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory

    #todo
    process_cfg.substitute_params(filename, subst_dict, log)
def prepare_config_bh(filename, cfg, log):
    subst_dict = dict()

    subst_dict["dataset"] = process_cfg.process_spaces(
        cfg.dataset_yaml_filename)
    subst_dict["input_working_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    subst_dict["output_dir"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["general_max_iterations"] = cfg.max_iterations
    subst_dict["general_max_nthreads"] = cfg.max_threads
    subst_dict["count_merge_nthreads"] = cfg.max_threads
    subst_dict["bayes_nthreads"] = cfg.max_threads
    subst_dict["expand_nthreads"] = cfg.max_threads
    subst_dict["correct_nthreads"] = cfg.max_threads
    subst_dict["general_hard_memory_limit"] = cfg.max_memory
    if "qvoffset" in cfg.__dict__:
        subst_dict["input_qvoffset"] = cfg.qvoffset
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 27
0
def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K,
                          stage, saves_dir, last_one):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["run_mode"] = "false"
    if "diploid_mode" in cfg.__dict__:
        subst_dict["diploid_mode"] = bool_to_str(cfg.diploid_mode)
    subst_dict["dataset"] = process_cfg.process_spaces(cfg.dataset)
    subst_dict["output_base"] = process_cfg.process_spaces(cfg.output_dir)
    subst_dict["tmp_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
    if additional_contigs_fname:
        subst_dict["additional_contigs"] = process_cfg.process_spaces(
            additional_contigs_fname)
        subst_dict["use_additional_contigs"] = bool_to_str(True)
    else:
        subst_dict["use_additional_contigs"] = bool_to_str(False)
    subst_dict["main_iteration"] = bool_to_str(last_one)
    subst_dict["entry_point"] = stage
    subst_dict["load_from"] = saves_dir
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= 55)
    subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable)
    #    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory
    subst_dict["correct_mismatches"] = bool_to_str(last_one)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "careful" in cfg.__dict__:
        subst_dict["mismatch_careful"] = bool_to_str(cfg.careful)
    if "pacbio_mode" in cfg.__dict__:
        subst_dict["pacbio_test_on"] = bool_to_str(cfg.pacbio_mode)
        subst_dict["pacbio_reads"] = process_cfg.process_spaces(
            cfg.pacbio_reads)
    if cfg.cov_cutoff == "off":
        subst_dict["use_coverage_threshold"] = bool_to_str(False)
    else:
        subst_dict["use_coverage_threshold"] = bool_to_str(True)
        if cfg.cov_cutoff == "auto":
            subst_dict["coverage_threshold"] = 0.0
        else:
            subst_dict["coverage_threshold"] = cfg.cov_cutoff

    process_cfg.substitute_params(filename, subst_dict, log)
def ModifyConfigFiles(params, log):
    cdr_param_dict = dict()
    cdr_param_dict['input_reads'] = params.input_reads
    cdr_param_dict['output_dir'] = params.output_dir
    cdr_param_dict['vj_finder_config'] = params.vj_finder_config_file
    cdr_param_dict['num_threads'] = params.num_threads
    cdr_param_dict['domain_system'] = params.domain_system
    cdr_param_dict['run_hg_constructor'] = os.path.join(home_directory, './build/release/bin/ig_swgraph_construct')

    vj_param_dict = dict()
    vj_param_dict['loci'] = params.loci
    vj_param_dict['germline_dir'] = os.path.join(home_directory, "data/germline")
    params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt")
    vj_param_dict['germline_filenames_config'] = params.germline_config_file

    ModifyParamsWrtOrganism(params, cdr_param_dict, vj_param_dict)
    process_cfg.substitute_params(params.cdr_labeler_config_file, cdr_param_dict, log)
    process_cfg.substitute_params(params.vj_finder_config_file, vj_param_dict, log)
Ejemplo n.º 29
0
def ModifyConfigFiles(params, log):
    cdr_param_dict = dict()
    cdr_param_dict['input_reads'] = params.input_reads
    cdr_param_dict['output_dir'] = params.output_dir
    cdr_param_dict['vj_finder_config'] = params.vj_finder_config_file
    cdr_param_dict['num_threads'] = params.num_threads
    cdr_param_dict['domain_system'] = params.domain_system
    cdr_param_dict['run_hg_constructor'] = os.path.join(home_directory, './build/release/bin/ig_swgraph_construct')

    vj_param_dict = dict()
    vj_param_dict['loci'] = params.loci
    vj_param_dict['germline_dir'] = os.path.join(home_directory, "data/germline")
    params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt")
    vj_param_dict['germline_filenames_config'] = params.germline_config_file

    ModifyParamsWrtOrganism(params, cdr_param_dict, vj_param_dict)
    process_cfg.substitute_params(params.cdr_labeler_config_file, cdr_param_dict, log)
    process_cfg.substitute_params(params.vj_finder_config_file, vj_param_dict, log)
Ejemplo n.º 30
0
 def prepare_config_bh(self, filename, cfg, log):
     subst_dict = dict()
     subst_dict["dataset"] = process_cfg.process_spaces(
         cfg.dataset_yaml_filename)
     subst_dict["input_working_dir"] = process_cfg.process_spaces(
         cfg.tmp_dir)
     subst_dict["output_dir"] = process_cfg.process_spaces(cfg.output_dir)
     subst_dict["general_max_iterations"] = options_storage.ITERATIONS
     subst_dict["general_max_nthreads"] = cfg.max_threads
     subst_dict["count_merge_nthreads"] = cfg.max_threads
     subst_dict["bayes_nthreads"] = cfg.max_threads
     subst_dict["expand_nthreads"] = cfg.max_threads
     subst_dict["correct_nthreads"] = cfg.max_threads
     subst_dict["general_hard_memory_limit"] = cfg.max_memory
     if "qvoffset" in cfg.__dict__:
         subst_dict["input_qvoffset"] = cfg.qvoffset
     if "count_filter_singletons" in cfg.__dict__:
         subst_dict["count_filter_singletons"] = cfg.count_filter_singletons
     if "read_buffer_size" in cfg.__dict__:
         subst_dict["count_split_buffer"] = cfg.read_buffer_size
     process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 31
0
def prepare_config_spades(filename, cfg, log, prev_K, K, last_one):
    subst_dict = dict()

    subst_dict["K"] = str(K)
    subst_dict["run_mode"] = "false"
    subst_dict["dataset"] = cfg.dataset
    subst_dict["output_base"] = cfg.output_dir
    subst_dict["additional_contigs"] = cfg.additional_contigs
    subst_dict["entry_point"] = "construction"
    subst_dict["developer_mode"] = bool_to_str(cfg.developer_mode)
    subst_dict["gap_closer_enable"] = bool_to_str(last_one)
    subst_dict["paired_mode"] = bool_to_str(last_one and cfg.paired_mode)
    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
    subst_dict["use_additional_contigs"] = bool_to_str(prev_K)
    subst_dict["max_threads"] = cfg.max_threads
    subst_dict["max_memory"] = cfg.max_memory
    subst_dict["correct_mismatches"] = bool_to_str(last_one)
    if "resolving_mode" in cfg.__dict__:
        subst_dict["resolving_mode"] = cfg.resolving_mode
    if "careful" in cfg.__dict__:
        subst_dict["mismatch_careful"] = bool_to_str(cfg.careful)

    process_cfg.substitute_params(filename, subst_dict, log)
def ModifyConfigFiles(params, log):
    igs_params_dict = dict()
    igs_params_dict['output_dir'] = params.output_dir
    igs_params_dict['loci'] = params.loci
    igs_params_dict['number_of_metaroots'] = params.number_of_metaroots
    igs_params_dict['pool_manager_strategy'] = params.tree_strategy
    igs_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline")
    igs_params_dict['cdr_labeler_config_filename'] = params.cdr_labeler_config_filename

    cdr_params_dict = dict()
    cdr_params_dict['vj_finder_config'] = params.vj_finder_config_filename

    vjf_params_dict = dict()
    params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt")
    vjf_params_dict['germline_filenames_config'] = params.germline_config_file
    vjf_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline")
    igs_params_dict['germline_filenames_config'] = params.germline_config_file

    cdr_params_dict = ModifyParamsWrtOrganism(params, cdr_params_dict)
    process_cfg.substitute_params(params.output_config_file, igs_params_dict, log)
    process_cfg.substitute_params(params.cdr_labeler_config_filename, cdr_params_dict, log)
    process_cfg.substitute_params(params.vj_finder_config_filename, vjf_params_dict, log)
Ejemplo n.º 33
0
def prepare_config_construction(filename, log):
    if options_storage.read_cov_threshold is None:
        return
    subst_dict = dict()
    subst_dict["read_cov_threshold"] = options_storage.read_cov_threshold
    process_cfg.substitute_params(filename, subst_dict, log)
Ejemplo n.º 34
0
def prepare_config_construction(filename, log):
    if options_storage.read_cov_threshold is None:
        return
    subst_dict = dict()
    subst_dict["read_cov_threshold"] = options_storage.read_cov_threshold
    process_cfg.substitute_params(filename, subst_dict, log)