def generate_cfg(self, cfg, output_files):
        self.cfg = merge_configs(cfg["assembly"], cfg["common"])
        self.cfg.__dict__["result_contigs"] = output_files[
            "result_contigs_filename"]
        self.cfg.__dict__["result_scaffolds"] = output_files[
            "result_scaffolds_filename"]
        self.cfg.__dict__["result_graph"] = output_files[
            "result_assembly_graph_filename"]
        self.cfg.__dict__["result_graph_gfa"] = output_files[
            "result_assembly_graph_filename_gfa"]
        self.cfg.__dict__["result_contigs_paths"] = output_files[
            "result_contigs_paths_filename"]
        self.cfg.__dict__["result_scaffolds_paths"] = output_files[
            "result_scaffolds_paths_filename"]
        self.cfg.__dict__["result_transcripts"] = output_files[
            "result_transcripts_filename"]
        self.cfg.__dict__["result_transcripts_paths"] = output_files[
            "result_transcripts_paths_filename"]
        self.cfg.__dict__["result_gene_clusters"] = output_files[
            "result_gene_clusters_filename"]
        self.cfg.__dict__["result_bgc_statistics"] = output_files[
            "result_bgc_stats_filename"]
        self.cfg.__dict__["result_domain_graph"] = output_files[
            "result_domain_graph_filename"]

        if self.cfg.disable_rr:
            self.cfg.__dict__["rr_enable"] = False
        else:
            self.cfg.__dict__["rr_enable"] = True

        dataset_filename = os.path.join(self.cfg.output_dir, "dataset.info")
        self.cfg.__dict__["dataset"] = dataset_filename
        self.cfg.tmp_dir = support.get_tmp_dir(prefix="spades_")
Esempio n. 2
0
def run_corrector(configs_dir, execution_home, cfg,
                ext_python_modules_home, log, to_correct, result):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    dir_util.copy_tree(os.path.join(configs_dir, "corrector"), dst_configs, preserve_times=False)
    cfg_file_name = os.path.join(dst_configs, "corrector.info")

    cfg.tmp_dir = support.get_tmp_dir(prefix="corrector_")

    prepare_config_corr(cfg_file_name, cfg, ext_python_modules_home)
    binary_name = "corrector"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name), os.path.abspath(to_correct)]

    log.info("\n== Running contig polishing tool: " + ' '.join(command) + "\n")


    log.info("\n== Dataset description file was created: " + cfg_file_name + "\n")

    support.sys_call(command, log)
    if not os.path.isfile(result):
        support.error("Mismatch correction finished abnormally: " + result + " not found!")
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 3
0
def run_corrector(configs_dir, execution_home, cfg,
                ext_python_modules_home, log, to_correct, result):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    dir_util.copy_tree(os.path.join(configs_dir, "corrector"), dst_configs, preserve_times=False)
    cfg_file_name = os.path.join(dst_configs, "corrector.info")

    cfg.tmp_dir = support.get_tmp_dir(prefix="corrector_")

    prepare_config_corr(cfg_file_name, cfg, ext_python_modules_home)
    binary_name = "corrector"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name), os.path.abspath(to_correct)]

    log.info("\n== Running contig polishing tool: " + ' '.join(command) + "\n")


    log.info("\n== Dataset description file was created: " + cfg_file_name + "\n")

    support.sys_call(command, log)
    if not os.path.isfile(result):
        support.error("Mismatch correction finished abnormally: " + result + " not found!")
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 4
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
               not_used_dataset_data, ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    if cfg.iontorrent:
        dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
    else:
        dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
    if cfg.iontorrent:
        prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
        binary_name = "ionhammer"
    else:
        prepare_config_bh(cfg_file_name, cfg, log)
        binary_name = "hammer"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name)]

    log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
    support.sys_call(command, log)
    if not os.path.isfile(corrected_dataset_yaml_filename):
        support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 5
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
               not_used_dataset_data, ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml
    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    if cfg.iontorrent:
        dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
    else:
        dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
    if cfg.iontorrent:
        prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
        binary_name = "ionhammer"
    else:
        prepare_config_bh(cfg_file_name, cfg, log)
        binary_name = "hammer"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name)]

    log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
    support.sys_call(command, log)
    if not os.path.isfile(corrected_dataset_yaml_filename):
        support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 6
0
    def generate_config(self, cfg):
        dst_configs = os.path.join(self.cfg.output_dir, "configs")
        if os.path.isdir(dst_configs):
            shutil.rmtree(dst_configs)
        dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "corrector"),
                           dst_configs,
                           preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "corrector.info")

        self.cfg.tmp_dir = support.get_tmp_dir(prefix="corrector_")
        prepare_config_corr(cfg_file_name, self.cfg,
                            self.ext_python_modules_home)
Esempio n. 7
0
    def generate_config(self, cfg):
        dst_configs = os.path.join(cfg.output_dir, "configs")
        if os.path.isdir(dst_configs):
            shutil.rmtree(dst_configs)
        if cfg.iontorrent:
            dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "ionhammer"),
                               dst_configs,
                               preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
        else:
            dir_util.copy_tree(os.path.join(self.tmp_configs_dir, "hammer"),
                               dst_configs,
                               preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "config.info")

        cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
        if cfg.iontorrent:
            self.prepare_config_ih(cfg_file_name, cfg,
                                   self.ext_python_modules_home)
        else:
            self.prepare_config_bh(cfg_file_name, cfg, self.log)
Esempio n. 8
0
def main(ds_args_list, general_args_list, spades_home, bin_home):
    log = logging.getLogger('dipspades')
    log.setLevel(logging.DEBUG)
    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    support.check_binaries(bin_home, log)
    ds_args = parse_arguments(ds_args_list, log)

    if not os.path.exists(ds_args.output_dir):
        os.makedirs(ds_args.output_dir)
    log_filename = os.path.join(ds_args.output_dir, "dipspades.log")
    if os.path.exists(log_filename):
        os.remove(log_filename)
    log_handler = logging.FileHandler(log_filename, mode='a')
    log.addHandler(log_handler)

    params_filename = os.path.join(ds_args.output_dir, "params.txt")
    params_handler = logging.FileHandler(params_filename, mode='a')
    log.addHandler(params_handler)

    log.info("\n")
    log.info("General command line: " + " ".join(general_args_list) + "\n")
    log.info("dipSPAdes command line: " + " ".join(ds_args_list) + "\n")
    print_ds_args(ds_args, log)
    log.removeHandler(params_handler)

    log.info("\n======= dipSPAdes started. Log can be found here: " + log_filename + "\n")
    write_haplocontigs_in_file(ds_args.haplocontigs, ds_args.haplocontigs_fnames)

    config_fname = prepare_configs(os.path.join(spades_home, "configs", "dipspades"), ds_args, log)
    ds_args.tmp_dir = support.get_tmp_dir(prefix="dipspades_", base_dir=ds_args.tmp_dir)
    prepare_config(config_fname, ds_args, log)

    try:
        log.info("===== Assembling started.\n")
        binary_path = os.path.join(bin_home, "spades-dipspades-core")
        command = [binary_path, config_fname]
        support.sys_call(command, log)
        log.info("\n===== Assembling finished.\n")
        print_ds_output(ds_args.output_dir, log)
        if os.path.isdir(ds_args.tmp_dir):
            shutil.rmtree(ds_args.tmp_dir)
        log.info("\n======= dipSPAdes finished.\n")
        log.info("dipSPAdes log can be found here: " + log_filename + "\n")
        log.info("Thank you for using dipSPAdes!")
        log.removeHandler(log_handler)
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            if exc_type == OSError and exc_value.errno == errno.ENOEXEC: # Exec format error
                support.error("It looks like you are using SPAdes binaries for another platform.\n" +
                              support.get_spades_binaries_info_message(), dipspades=True)
            else:
                log.exception(exc_value)
                support.error("exception caught: %s" % exc_type, log)
    except BaseException: # since python 2.5 system-exiting exceptions (e.g. KeyboardInterrupt) are derived from BaseException
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            support.error("exception caught: %s" % exc_type, log, dipspades=True)
Esempio n. 9
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
               dataset_data, ext_python_modules_home, only_compressing_is_needed, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    # not all reads need processing
    if support.get_lib_ids_by_type(dataset_data, options_storage.LONG_READS_TYPES):
        not_used_dataset_data = support.get_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
        to_correct_dataset_data = support.rm_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
        to_correct_dataset_yaml_filename = os.path.join(cfg.output_dir, "to_correct.yaml")
        pyyaml.dump(to_correct_dataset_data, open(to_correct_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500)
        cfg.dataset_yaml_filename = to_correct_dataset_yaml_filename
    else:
        not_used_dataset_data = None

    if not only_compressing_is_needed:
        dst_configs = os.path.join(cfg.output_dir, "configs")
        if os.path.exists(dst_configs):
            shutil.rmtree(dst_configs)
        if cfg.iontorrent:
            dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
        else:
            dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "config.info")

        cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
        if cfg.iontorrent:
            prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
            binary_name = "ionhammer"
        else:
            prepare_config_bh(cfg_file_name, cfg, log)
            binary_name = "hammer"

        command = [os.path.join(execution_home, binary_name),
                   os.path.abspath(cfg_file_name)]

        log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
        support.sys_call(command, log)
        if not os.path.isfile(corrected_dataset_yaml_filename):
            support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    else:
        log.info("\n===== Skipping %s (already processed). \n" % "read error correction tool")
        support.continue_from_here(log)

    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500)
    log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 10
0
def run_spades(configs_dir, execution_home, cfg, dataset_data,
               ext_python_modules_home, log):
    if not isinstance(cfg.iterative_K, list):
        cfg.iterative_K = [cfg.iterative_K]
    cfg.iterative_K = sorted(cfg.iterative_K)

    # checking and removing conflicting K-mer directories
    if options_storage.restart_from:
        processed_K = []
        for k in range(options_storage.MIN_K, options_storage.MAX_K, 2):
            cur_K_dir = os.path.join(cfg.output_dir, "K%d" % k)
            if os.path.isdir(cur_K_dir) and os.path.isfile(
                    os.path.join(cur_K_dir, "final_contigs.fasta")):
                processed_K.append(k)
        if processed_K:
            RL = get_read_length(cfg.output_dir, processed_K[0],
                                 ext_python_modules_home, log)
            needed_K = update_k_mers_in_special_cases(cfg.iterative_K,
                                                      RL,
                                                      log,
                                                      silent=True)
            needed_K = [k for k in needed_K if k < RL]
            original_K = reveal_original_k_mers(RL)

            k_to_delete = []
            for id, k in enumerate(needed_K):
                if len(processed_K) == id:
                    if processed_K[-1] == original_K[
                            -1]:  # the last K in the original run was processed in "last_one" mode
                        k_to_delete = [original_K[-1]]
                    break
                if processed_K[id] != k:
                    k_to_delete = processed_K[id:]
                    break
            if not k_to_delete and (len(processed_K) > len(needed_K)):
                k_to_delete = processed_K[len(needed_K) - 1:]
            if k_to_delete:
                log.info(
                    "Restart mode: removing previously processed directories for K=%s "
                    "to avoid conflicts with K specified with --restart-from" %
                    (str(k_to_delete)))
                for k in k_to_delete:
                    shutil.rmtree(os.path.join(cfg.output_dir, "K%d" % k))

    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
    if os.path.isdir(bin_reads_dir) and not options_storage.continue_mode:
        shutil.rmtree(bin_reads_dir)
    cfg.tmp_dir = support.get_tmp_dir(prefix="spades_")

    if len(cfg.iterative_K) == 1:
        run_iteration(configs_dir, execution_home, cfg, log,
                      cfg.iterative_K[0], None, True)
        K = cfg.iterative_K[0]
    else:
        run_iteration(configs_dir, execution_home, cfg, log,
                      cfg.iterative_K[0], None, False)
        prev_K = cfg.iterative_K[0]
        RL = get_read_length(cfg.output_dir, cfg.iterative_K[0],
                             ext_python_modules_home, log)
        cfg.iterative_K = update_k_mers_in_special_cases(
            cfg.iterative_K, RL, log)
        if cfg.iterative_K[1] + 1 > RL:
            if cfg.rr_enable:
                support.warning(
                    "Second value of iterative K (%d) exceeded estimated read length (%d). "
                    "Rerunning for the first value of K (%d) with Repeat Resolving"
                    % (cfg.iterative_K[1], RL, cfg.iterative_K[0]), log)
                run_iteration(configs_dir, execution_home, cfg, log,
                              cfg.iterative_K[0], None, True)
                K = cfg.iterative_K[0]
        else:
            rest_of_iterative_K = cfg.iterative_K
            rest_of_iterative_K.pop(0)
            count = 0
            for K in rest_of_iterative_K:
                count += 1
                last_one = count == len(
                    cfg.iterative_K) or (rest_of_iterative_K[count] + 1 > RL)
                run_iteration(configs_dir, execution_home, cfg, log, K, prev_K,
                              last_one)
                prev_K = K
                if last_one:
                    break
            if count < len(cfg.iterative_K):
                support.warning(
                    "Iterations stopped. Value of K (%d) exceeded estimated read length (%d)"
                    % (cfg.iterative_K[count], RL), log)

    latest = os.path.join(cfg.output_dir, "K%d" % K)

    for format in [".fasta", ".fastg"]:
        if os.path.isfile(os.path.join(latest, "before_rr" + format)):
            result_before_rr_contigs = os.path.join(
                os.path.dirname(cfg.result_contigs), "before_rr" + format)
            if not os.path.isfile(result_before_rr_contigs
                                  ) or not options_storage.continue_mode:
                shutil.copyfile(os.path.join(latest, "before_rr" + format),
                                result_before_rr_contigs)
        if os.path.isfile(os.path.join(latest, "final_contigs" + format)):
            if not os.path.isfile(cfg.result_contigs[:-6] +
                                  format) or not options_storage.continue_mode:
                shutil.copyfile(os.path.join(latest, "final_contigs" + format),
                                cfg.result_contigs[:-6] + format)
        if cfg.rr_enable:
            if os.path.isfile(os.path.join(latest, "scaffolds" + format)):
                if not os.path.isfile(cfg.result_scaffolds[:-6] + format
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "scaffolds" + format),
                                    cfg.result_scaffolds[:-6] + format)

    if cfg.developer_mode:
        # saves
        saves_link = os.path.join(os.path.dirname(cfg.result_contigs), "saves")
        if os.path.lexists(
                saves_link
        ):  # exists return False for broken link! lexists return True
            os.remove(saves_link)
        os.symlink(os.path.join(latest, "saves"), saves_link)

    if os.path.isdir(bin_reads_dir):
        shutil.rmtree(bin_reads_dir)
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)

    return latest
Esempio n. 11
0
def main(args):
    os.environ["LC_ALL"] = "C"

    if len(args) == 1:
        options_storage.usage(spades_version)
        sys.exit(0)

    log = logging.getLogger('spades')
    log.setLevel(logging.DEBUG)

    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    support.check_binaries(bin_home, log)

    # parse options and safe all parameters to cfg
    options = args
    cfg, dataset_data = fill_cfg(options, log)

    if options_storage.continue_mode:
        cmd_line, options = get_options_from_params(
            os.path.join(options_storage.output_dir, "params.txt"), args[0])
        if not options:
            support.error(
                "failed to parse command line of the previous run! Please restart from the beginning or specify another output directory."
            )
        cfg, dataset_data = fill_cfg(options, log)
        if options_storage.restart_from:
            check_cfg_for_restart_from(cfg)
        options_storage.continue_mode = True

    log_filename = os.path.join(cfg["common"].output_dir, "spades.log")
    if options_storage.continue_mode:
        log_handler = logging.FileHandler(log_filename, mode='a')
    else:
        log_handler = logging.FileHandler(log_filename, mode='w')
    log.addHandler(log_handler)

    if options_storage.continue_mode:
        log.info(
            "\n======= SPAdes pipeline continued. Log can be found here: " +
            log_filename + "\n")
        log.info("Restored from " + cmd_line)
        if options_storage.restart_from:
            updated_params = ""
            flag = False
            for v in args[1:]:
                if v == '-o' or v == '--restart-from':
                    flag = True
                    continue
                if flag:
                    flag = False
                    continue
                updated_params += " " + v
            updated_params = updated_params.strip()
            log.info("with updated parameters: " + updated_params)
            cmd_line += " " + updated_params
        log.info("")

    params_filename = os.path.join(cfg["common"].output_dir, "params.txt")
    params_handler = logging.FileHandler(params_filename, mode='w')
    log.addHandler(params_handler)

    if options_storage.continue_mode:
        log.info(cmd_line)
    else:
        command = "Command line:"
        for v in args:
            command += " " + v
        log.info(command)

    # special case
    if "mismatch_corrector" in cfg and not support.get_lib_ids_by_type(
            dataset_data, 'paired-end'):
        support.warning(
            'cannot perform mismatch correction without at least one paired-end library! Skipping this step.',
            log)
        del cfg["mismatch_corrector"]

    print_used_values(cfg, log)
    log.removeHandler(params_handler)

    support.check_single_reads_in_options(options, log)

    if not options_storage.continue_mode:
        log.info("\n======= SPAdes pipeline started. Log can be found here: " +
                 log_filename + "\n")

    # splitting interlaced reads and processing Ns in additional contigs if needed
    if support.dataset_has_interlaced_reads(
            dataset_data) or support.dataset_has_additional_contigs(
                dataset_data):
        dir_for_split_reads = os.path.join(options_storage.output_dir,
                                           'split_input')
        if support.dataset_has_interlaced_reads(dataset_data):
            if not os.path.isdir(dir_for_split_reads):
                os.makedirs(dir_for_split_reads)
            dataset_data = support.split_interlaced_reads(
                dataset_data, dir_for_split_reads, log)
        if support.dataset_has_additional_contigs(dataset_data):
            dataset_data = support.process_Ns_in_additional_contigs(
                dataset_data, dir_for_split_reads, log)
        options_storage.dataset_yaml_filename = os.path.join(
            options_storage.output_dir, "input_dataset.yaml")
        pyyaml.dump(dataset_data,
                    open(options_storage.dataset_yaml_filename, 'w'))
        cfg["dataset"].yaml_filename = options_storage.dataset_yaml_filename

    try:
        # copying configs before all computations (to prevent its changing at run time)
        tmp_configs_dir = os.path.join(cfg["common"].output_dir, "configs")
        if os.path.isdir(
                tmp_configs_dir) and not options_storage.continue_mode:
            shutil.rmtree(tmp_configs_dir)
        if not os.path.isdir(tmp_configs_dir):
            dir_util.copy_tree(os.path.join(spades_home, "configs"),
                               tmp_configs_dir,
                               preserve_times=False)

        corrected_dataset_yaml_filename = ''
        if "error_correction" in cfg:
            STAGE_NAME = "Read error correction"
            bh_cfg = merge_configs(cfg["error_correction"], cfg["common"])
            corrected_dataset_yaml_filename = os.path.join(
                bh_cfg.output_dir, "corrected.yaml")
            if os.path.isfile(corrected_dataset_yaml_filename) and options_storage.continue_mode \
                and not options_storage.restart_from == "ec":
                log.info("\n===== Skipping %s (already processed). \n" %
                         STAGE_NAME)
            else:
                support.continue_from_here(log)

                if "HEAPCHECK" in os.environ:
                    del os.environ["HEAPCHECK"]
                if "heap_check" in bh_cfg.__dict__:
                    os.environ["HEAPCHECK"] = bh_cfg.heap_check

                if os.path.exists(bh_cfg.output_dir):
                    shutil.rmtree(bh_cfg.output_dir)
                os.makedirs(bh_cfg.output_dir)

                if support.get_lib_ids_by_type(
                        dataset_data, options_storage.LONG_READS_TYPES):
                    not_used_dataset_data = support.get_libs_by_type(
                        dataset_data, options_storage.LONG_READS_TYPES)
                    to_correct_dataset_data = support.rm_libs_by_type(
                        dataset_data, options_storage.LONG_READS_TYPES)
                    to_correct_dataset_yaml_filename = os.path.join(
                        bh_cfg.output_dir, "to_correct.yaml")
                    pyyaml.dump(to_correct_dataset_data,
                                open(to_correct_dataset_yaml_filename, 'w'))
                    bh_cfg.__dict__[
                        "dataset_yaml_filename"] = to_correct_dataset_yaml_filename
                else:
                    not_used_dataset_data = None
                    bh_cfg.__dict__["dataset_yaml_filename"] = cfg[
                        "dataset"].yaml_filename

                log.info("\n===== %s started. \n" % STAGE_NAME)
                hammer_logic.run_hammer(corrected_dataset_yaml_filename,
                                        tmp_configs_dir, bin_home, bh_cfg,
                                        not_used_dataset_data,
                                        ext_python_modules_home, log)
                log.info("\n===== %s finished. \n" % STAGE_NAME)

        result_contigs_filename = os.path.join(cfg["common"].output_dir,
                                               "contigs.fasta")
        result_scaffolds_filename = os.path.join(cfg["common"].output_dir,
                                                 "scaffolds.fasta")
        misc_dir = os.path.join(cfg["common"].output_dir, "misc")
        ### if mismatch correction is enabled then result contigs are copied to misc directory
        assembled_contigs_filename = os.path.join(misc_dir,
                                                  "assembled_contigs.fasta")
        assembled_scaffolds_filename = os.path.join(
            misc_dir, "assembled_scaffolds.fasta")
        if "assembly" in cfg:
            STAGE_NAME = "Assembling"
            spades_cfg = merge_configs(cfg["assembly"], cfg["common"])
            spades_cfg.__dict__["result_contigs"] = result_contigs_filename
            spades_cfg.__dict__["result_scaffolds"] = result_scaffolds_filename

            if options_storage.continue_mode and (os.path.isfile(spades_cfg.result_contigs)
                                                  or ("mismatch_corrector" in cfg and
                                                      os.path.isfile(assembled_contigs_filename)))\
                and not options_storage.restart_from == 'as' \
                and not (options_storage.restart_from and options_storage.restart_from.startswith('k')):

                log.info("\n===== Skipping %s (already processed). \n" %
                         STAGE_NAME)
                # calculating latest_dir for the next stages
                latest_dir = support.get_latest_dir(
                    os.path.join(spades_cfg.output_dir, "K*"))
                if not latest_dir:
                    support.error(
                        "failed to continue the previous run! Please restart from previous stages or from the beginning.",
                        log)
            else:
                old_result_files = [
                    result_contigs_filename, result_scaffolds_filename,
                    assembled_contigs_filename, assembled_scaffolds_filename
                ]
                for format in [".fasta", ".fastg"]:
                    for old_result_file in old_result_files:
                        if os.path.isfile(old_result_file[:-6] + format):
                            os.remove(old_result_file[:-6] + format)

                if options_storage.restart_from == 'as':
                    support.continue_from_here(log)

                if os.path.isfile(corrected_dataset_yaml_filename):
                    dataset_data = pyyaml.load(
                        open(corrected_dataset_yaml_filename, 'r'))
                    dataset_data = support.relative2abs_paths(
                        dataset_data,
                        os.path.dirname(corrected_dataset_yaml_filename))
                if spades_cfg.disable_rr:
                    spades_cfg.__dict__["rr_enable"] = False
                else:
                    spades_cfg.__dict__["rr_enable"] = True

                if "HEAPCHECK" in os.environ:
                    del os.environ["HEAPCHECK"]
                if "heap_check" in spades_cfg.__dict__:
                    os.environ["HEAPCHECK"] = spades_cfg.heap_check

                log.info("\n===== %s started.\n" % STAGE_NAME)

                # creating dataset
                dataset_filename = os.path.join(spades_cfg.output_dir,
                                                "dataset.info")
                if not os.path.isfile(
                        dataset_filename) or not options_storage.continue_mode:
                    dataset_file = open(dataset_filename, 'w')
                    import process_cfg
                    dataset_file.write(
                        "single_cell" + '\t' +
                        process_cfg.bool_to_str(cfg["dataset"].single_cell) +
                        '\n')
                    if os.path.isfile(corrected_dataset_yaml_filename):
                        dataset_file.write(
                            "reads" + '\t' + process_cfg.process_spaces(
                                corrected_dataset_yaml_filename) + '\n')
                    else:
                        dataset_file.write("reads" + '\t' +
                                           process_cfg.process_spaces(
                                               cfg["dataset"].yaml_filename) +
                                           '\n')
                    if spades_cfg.developer_mode and "reference" in cfg[
                            "dataset"].__dict__:
                        dataset_file.write("reference_genome" + '\t')
                        dataset_file.write(
                            process_cfg.process_spaces(
                                cfg["dataset"].reference) + '\n')
                    dataset_file.close()
                spades_cfg.__dict__["dataset"] = dataset_filename

                latest_dir = spades_logic.run_spades(tmp_configs_dir, bin_home,
                                                     spades_cfg, dataset_data,
                                                     ext_python_modules_home,
                                                     log)

                if os.path.isdir(
                        misc_dir) and not options_storage.continue_mode:
                    shutil.rmtree(misc_dir)
                if not os.path.isdir(misc_dir):
                    os.makedirs(misc_dir)

                if options_storage.continue_mode and options_storage.restart_from and options_storage.restart_from.startswith(
                        'k'):
                    k_str = options_storage.restart_from[1:]
                    if k_str.find(":") != -1:
                        k_str = k_str[:k_str.find(":")]
                    support.error(
                        "failed to continue from K=%s because this K was not processed in the original run!"
                        % k_str, log)
                log.info("\n===== %s finished. \n" % STAGE_NAME)

            #corrector
            if "mismatch_corrector" in cfg and (
                    os.path.isfile(result_contigs_filename) or
                (options_storage.continue_mode
                 and os.path.isfile(assembled_contigs_filename))):
                STAGE_NAME = "Mismatch correction"
                to_correct = dict()
                to_correct["contigs"] = (result_contigs_filename,
                                         assembled_contigs_filename)
                if os.path.isfile(result_scaffolds_filename) or (
                        options_storage.continue_mode
                        and os.path.isfile(assembled_scaffolds_filename)):
                    to_correct["scaffolds"] = (result_scaffolds_filename,
                                               assembled_scaffolds_filename)

                # moving assembled contigs (scaffolds) to misc dir
                for assembly_type, (old, new) in to_correct.items():
                    if options_storage.continue_mode and os.path.isfile(new):
                        continue
                    for format in [".fasta", ".fastg"]:
                        if os.path.isfile(old[:-6] + format):
                            shutil.move(old[:-6] + format, new[:-6] + format)

                if options_storage.continue_mode and os.path.isfile(result_contigs_filename) and \
                    (os.path.isfile(result_scaffolds_filename) or not os.path.isfile(assembled_scaffolds_filename)) \
                    and not options_storage.restart_from == 'mc':
                    log.info("\n===== Skipping %s (already processed). \n" %
                             STAGE_NAME)
                else:
                    if options_storage.restart_from == 'mc':
                        support.continue_from_here(log)

                    log.info("\n===== %s started." % STAGE_NAME)
                    # detecting paired-end library with the largest insert size
                    est_params_data = pyyaml.load(
                        open(os.path.join(latest_dir, "final.lib_data"), 'r'))
                    max_IS_library = None
                    for reads_library in est_params_data:
                        if reads_library['type'] == 'paired-end':
                            if not max_IS_library or float(
                                    reads_library["insert size mean"]) > float(
                                        max_IS_library["insert size mean"]):
                                max_IS_library = reads_library
                    if not max_IS_library:
                        support.error(
                            'Mismatch correction cannot be performed without at least one paired-end library!',
                            log)
                    if not max_IS_library["insert size mean"]:
                        support.warning(
                            'Failed to estimate insert size for all paired-end libraries. Starting Mismatch correction'
                            ' based on the first paired-end library and with default insert size.',
                            log)
                    else:
                        cfg["mismatch_corrector"].__dict__[
                            "insert-size"] = round(
                                max_IS_library["insert size mean"])
                    yaml_dirname = os.path.dirname(
                        options_storage.dataset_yaml_filename)
                    cfg["mismatch_corrector"].__dict__["1"] = list(
                        map(lambda x: os.path.join(yaml_dirname, x),
                            max_IS_library['left reads']))
                    cfg["mismatch_corrector"].__dict__["2"] = list(
                        map(lambda x: os.path.join(yaml_dirname, x),
                            max_IS_library['right reads']))
                    #TODO: add reads orientation

                    import corrector
                    corrector_cfg = cfg["mismatch_corrector"]
                    args = []
                    for key, values in corrector_cfg.__dict__.items():
                        if key == "output-dir":
                            continue

                        # for processing list of reads
                        if not isinstance(values, list):
                            values = [values]
                        for value in values:
                            if len(key) == 1:
                                args.append('-' + key)
                            else:
                                args.append('--' + key)
                            if value is not None:
                                args.append(value)

                    # processing contigs and scaffolds (or only contigs)
                    for assembly_type, (corrected,
                                        assembled) in to_correct.items():
                        if options_storage.continue_mode and os.path.isfile(
                                corrected):
                            log.info("\n== Skipping processing of " +
                                     assembly_type + " (already processed)\n")
                            continue

                        support.continue_from_here(log)
                        log.info("\n== Processing of " + assembly_type + "\n")

                        cur_args = args[:]
                        cur_args += ['-c', assembled]
                        tmp_dir_for_corrector = support.get_tmp_dir(
                            prefix="mis_cor_%s_" % assembly_type)
                        cur_args += ['--output-dir', tmp_dir_for_corrector]

                        # correcting
                        corrector.main(cur_args, ext_python_modules_home, log)

                        result_corrected_filename = os.path.join(
                            tmp_dir_for_corrector, "corrected_contigs.fasta")
                        # moving corrected contigs (scaffolds) to SPAdes output dir
                        if os.path.isfile(result_corrected_filename):
                            shutil.move(result_corrected_filename, corrected)

                        if os.path.isdir(tmp_dir_for_corrector):
                            shutil.rmtree(tmp_dir_for_corrector)

                        assembled_fastg = assembled[:-6] + ".fastg"
                        if os.path.isfile(assembled_fastg):
                            support.create_fastg_from_fasta(
                                corrected, assembled_fastg, log)
                    log.info("\n===== %s finished.\n" % STAGE_NAME)

        if not cfg["common"].developer_mode and os.path.isdir(tmp_configs_dir):
            shutil.rmtree(tmp_configs_dir)

        #log.info("")
        if "error_correction" in cfg and os.path.isdir(
                os.path.dirname(corrected_dataset_yaml_filename)):
            log.info(" * Corrected reads are in " + support.process_spaces(
                os.path.dirname(corrected_dataset_yaml_filename) + "/"))
        if "assembly" in cfg and os.path.isfile(result_contigs_filename):
            message = " * Assembled contigs are in " + support.process_spaces(
                result_contigs_filename)
            if os.path.isfile(result_contigs_filename[:-6] + ".fastg"):
                message += " (" + os.path.basename(
                    result_contigs_filename[:-6] + ".fastg") + ")"
            log.info(message)
        if "assembly" in cfg and os.path.isfile(result_scaffolds_filename):
            message = " * Assembled scaffolds are in " + support.process_spaces(
                result_scaffolds_filename)
            if os.path.isfile(result_scaffolds_filename[:-6] + ".fastg"):
                message += " (" + os.path.basename(
                    result_scaffolds_filename[:-6] + ".fastg") + ")"
            log.info(message)
        #log.info("")

        #breaking scaffolds
        if os.path.isfile(result_scaffolds_filename):
            if not os.path.isdir(misc_dir):
                os.makedirs(misc_dir)
            result_broken_scaffolds = os.path.join(misc_dir,
                                                   "broken_scaffolds.fasta")
            if not os.path.isfile(result_broken_scaffolds
                                  ) or not options_storage.continue_mode:
                modified, broken_scaffolds = support.break_scaffolds(
                    result_scaffolds_filename,
                    options_storage.THRESHOLD_FOR_BREAKING_SCAFFOLDS)
                if modified:
                    support.write_fasta(result_broken_scaffolds,
                                        broken_scaffolds)
                    #log.info(" * Scaffolds broken by " + str(options_storage.THRESHOLD_FOR_BREAKING_SCAFFOLDS) +
                    # " Ns are in " + result_broken_scaffolds)

        ### printing WARNINGS SUMMARY
        if not support.log_warnings(log):
            log.info("\n======= SPAdes pipeline finished."
                     )  # otherwise it finished WITH WARNINGS

        if options_storage.test_mode:
            for result_filename in [
                    result_contigs_filename, result_scaffolds_filename
            ]:
                if os.path.isfile(result_filename):
                    result_fasta = list(support.read_fasta(result_filename))
                    # correctness check: should be one contig of length 1000 bp
                    correct_number = 1
                    correct_length = 1000
                    if not len(result_fasta):
                        support.error(
                            "TEST FAILED: %s does not contain contigs!" %
                            result_filename)
                    elif len(result_fasta) > correct_number:
                        support.error(
                            "TEST FAILED: %s contains more than %d contig (%d)!"
                            % (result_filename, correct_number,
                               len(result_fasta)))
                    elif len(result_fasta[0][1]) != correct_length:
                        if len(result_fasta[0][1]) > correct_length:
                            relation = "more"
                        else:
                            relation = "less"
                        support.error(
                            "TEST FAILED: %s contains %s than %d bp (%d bp)!" %
                            (result_filename, relation, correct_length,
                             len(result_fasta[0][1])))
                else:
                    support.error("TEST FAILED: " + result_filename +
                                  " does not exist!")
            log.info("\n========= TEST PASSED CORRECTLY.")

        log.info("\nSPAdes log can be found here: " + log_filename)
        log.info("")
        log.info("Thank you for using SPAdes!")
        log.removeHandler(log_handler)

    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            if exc_type == OSError and exc_value.errno == errno.ENOEXEC:  # Exec format error
                support.error(
                    "It looks like you are using SPAdes binaries for another platform.\n"
                    + support.get_spades_binaries_info_message())
            else:
                log.exception(exc_value)
                support.error("exception caught: %s" % exc_type, log)
    except BaseException:  # since python 2.5 system-exiting exceptions (e.g. KeyboardInterrupt) are derived from BaseException
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            support.error("exception caught: %s" % exc_type, log)
Esempio n. 12
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home,
               cfg, dataset_data, ext_python_modules_home,
               only_compressing_is_needed, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    # not all reads need processing
    if support.get_lib_ids_by_type(dataset_data,
                                   options_storage.LONG_READS_TYPES):
        not_used_dataset_data = support.get_libs_by_type(
            dataset_data, options_storage.LONG_READS_TYPES)
        to_correct_dataset_data = support.rm_libs_by_type(
            dataset_data, options_storage.LONG_READS_TYPES)
        to_correct_dataset_yaml_filename = os.path.join(
            cfg.output_dir, "to_correct.yaml")
        pyyaml.dump(to_correct_dataset_data,
                    open(to_correct_dataset_yaml_filename, 'w'),
                    default_flow_style=False,
                    default_style='"',
                    width=float("inf"))
        cfg.dataset_yaml_filename = to_correct_dataset_yaml_filename
    else:
        not_used_dataset_data = None

    if not only_compressing_is_needed:
        dst_configs = os.path.join(cfg.output_dir, "configs")
        if os.path.exists(dst_configs):
            shutil.rmtree(dst_configs)
        if cfg.iontorrent:
            dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"),
                               dst_configs,
                               preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
        else:
            dir_util.copy_tree(os.path.join(configs_dir, "hammer"),
                               dst_configs,
                               preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "config.info")

        cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
        if cfg.iontorrent:
            prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
            binary_name = "spades-ionhammer"
        else:
            prepare_config_bh(cfg_file_name, cfg, log)
            binary_name = "spades-hammer"

        command = [
            os.path.join(execution_home, binary_name),
            os.path.abspath(cfg_file_name)
        ]

        log.info("\n== Running read error correction tool: " +
                 ' '.join(command) + "\n")
        support.sys_call(command, log)
        if not os.path.isfile(corrected_dataset_yaml_filename):
            support.error("read error correction finished abnormally: " +
                          corrected_dataset_yaml_filename + " not found!")
    else:
        log.info("\n===== Skipping %s (already processed). \n" %
                 "read error correction tool")
        support.continue_from_here(log)

    corrected_dataset_data = pyyaml.load(
        open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home,
                               cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data,
                    open(corrected_dataset_yaml_filename, 'w'),
                    default_flow_style=False,
                    default_style='"',
                    width=float("inf"))
    log.info("\n== Dataset description file was created: " +
             corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 13
0
def main(ds_args_list, general_args_list, spades_home, bin_home):
    log = logging.getLogger('dipspades')
    log.setLevel(logging.DEBUG)
    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    support.check_binaries(bin_home, log)
    ds_args = parse_arguments(ds_args_list, log)

    if not os.path.exists(ds_args.output_dir):
        os.makedirs(ds_args.output_dir)
    log_filename = os.path.join(ds_args.output_dir, "dipspades.log")
    if os.path.exists(log_filename):
        os.remove(log_filename)
    log_handler = logging.FileHandler(log_filename, mode='a')
    log.addHandler(log_handler)

    params_filename = os.path.join(ds_args.output_dir, "params.txt")
    params_handler = logging.FileHandler(params_filename, mode='a')
    log.addHandler(params_handler)

    log.info("\n")
    log.info("General command line: " + " ".join(general_args_list) + "\n")
    log.info("dipSPAdes command line: " + " ".join(ds_args_list) + "\n")
    print_ds_args(ds_args, log)
    log.removeHandler(params_handler)

    log.info("\n======= dipSPAdes started. Log can be found here: " + log_filename + "\n")
    write_haplocontigs_in_file(ds_args.haplocontigs, ds_args.haplocontigs_fnames)

    config_fname = prepare_configs(os.path.join(spades_home, "configs", "dipspades"), ds_args, log)
    ds_args.tmp_dir = support.get_tmp_dir(prefix="dipspades_", base_dir=ds_args.tmp_dir)
    prepare_config(config_fname, ds_args, log)

    try:
        log.info("===== Assembling started.\n")
        binary_path = os.path.join(bin_home, "dipspades")
        command = [binary_path, config_fname]
        support.sys_call(command, log)
        log.info("\n===== Assembling finished.\n")
        print_ds_output(ds_args.output_dir, log)
        if os.path.isdir(ds_args.tmp_dir):
            shutil.rmtree(ds_args.tmp_dir)
        log.info("\n======= dipSPAdes finished.\n")
        log.info("dipSPAdes log can be found here: " + log_filename + "\n")
        log.info("Thank you for using dipSPAdes!")
        log.removeHandler(log_handler)
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            if exc_type == OSError and exc_value.errno == errno.ENOEXEC: # Exec format error
                support.error("It looks like you are using SPAdes binaries for another platform.\n" +
                              support.get_spades_binaries_info_message(), dipspades=True)
            else:
                log.exception(exc_value)
                support.error("exception caught: %s" % exc_type, log)
    except BaseException: # since python 2.5 system-exiting exceptions (e.g. KeyboardInterrupt) are derived from BaseException
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            support.error("exception caught: %s" % exc_type, log, dipspades=True)
Esempio n. 14
0
def run_spades(configs_dir, execution_home, cfg, dataset_data,
               ext_python_modules_home, log):
    if not isinstance(cfg.iterative_K, list):
        cfg.iterative_K = [cfg.iterative_K]
    cfg.iterative_K = sorted(cfg.iterative_K)
    used_K = []

    # checking and removing conflicting K-mer directories
    if options_storage.restart_from and (options_storage.restart_k_mers !=
                                         options_storage.original_k_mers):
        processed_K = []
        for k in range(options_storage.MIN_K, options_storage.MAX_K, 2):
            cur_K_dir = os.path.join(cfg.output_dir, "K%d" % k)
            if os.path.isdir(cur_K_dir) and os.path.isfile(
                    os.path.join(cur_K_dir, "final_contigs.fasta")):
                processed_K.append(k)
        if processed_K:
            RL = get_read_length(cfg.output_dir, processed_K[0],
                                 ext_python_modules_home, log)
            needed_K = update_k_mers_in_special_cases(cfg.iterative_K,
                                                      RL,
                                                      log,
                                                      silent=True)
            needed_K = [k for k in needed_K if k < RL]
            original_K = reveal_original_k_mers(RL)

            k_to_delete = []
            for id, k in enumerate(needed_K):
                if len(processed_K) == id:
                    if processed_K[-1] == original_K[
                            -1]:  # the last K in the original run was processed in "last_one" mode
                        k_to_delete = [original_K[-1]]
                    break
                if processed_K[id] != k:
                    k_to_delete = processed_K[id:]
                    break
            if not k_to_delete and (len(processed_K) > len(needed_K)):
                k_to_delete = processed_K[len(needed_K) - 1:]
            if k_to_delete:
                log.info(
                    "Restart mode: removing previously processed directories for K=%s "
                    "to avoid conflicts with K specified with --restart-from" %
                    (str(k_to_delete)))
                for k in k_to_delete:
                    shutil.rmtree(os.path.join(cfg.output_dir, "K%d" % k))

    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
    if os.path.isdir(bin_reads_dir) and not options_storage.continue_mode:
        shutil.rmtree(bin_reads_dir)
    cfg.tmp_dir = support.get_tmp_dir(prefix="spades_")

    finished_on_stop_after = False
    K = cfg.iterative_K[0]
    if len(cfg.iterative_K) == 1:
        run_iteration(configs_dir, execution_home, cfg, log, K, None, True)
        used_K.append(K)
    else:
        run_iteration(configs_dir, execution_home, cfg, log, K, None, False)
        used_K.append(K)
        if options_storage.stop_after == "k%d" % K:
            finished_on_stop_after = True
        else:
            prev_K = K
            RL = get_read_length(cfg.output_dir, K, ext_python_modules_home,
                                 log)
            cfg.iterative_K = update_k_mers_in_special_cases(
                cfg.iterative_K, RL, log)
            if len(cfg.iterative_K) < 2 or cfg.iterative_K[1] + 1 > RL:
                if cfg.rr_enable:
                    if len(cfg.iterative_K) < 2:
                        log.info(
                            "== Rerunning for the first value of K (%d) with Repeat Resolving"
                            % cfg.iterative_K[0])
                    else:
                        support.warning(
                            "Second value of iterative K (%d) exceeded estimated read length (%d). "
                            "Rerunning for the first value of K (%d) with Repeat Resolving"
                            % (cfg.iterative_K[1], RL, cfg.iterative_K[0]),
                            log)
                    run_iteration(configs_dir, execution_home, cfg, log,
                                  cfg.iterative_K[0], None, True)
                    used_K.append(cfg.iterative_K[0])
                    K = cfg.iterative_K[0]
            else:
                rest_of_iterative_K = cfg.iterative_K
                rest_of_iterative_K.pop(0)
                count = 0
                for K in rest_of_iterative_K:
                    count += 1
                    last_one = count == len(cfg.iterative_K) or (
                        rest_of_iterative_K[count] + 1 > RL)
                    run_iteration(configs_dir, execution_home, cfg, log, K,
                                  prev_K, last_one)
                    used_K.append(K)
                    prev_K = K
                    if last_one:
                        break
                    if options_storage.stop_after == "k%d" % K:
                        finished_on_stop_after = True
                        break
                if count < len(cfg.iterative_K) and not finished_on_stop_after:
                    support.warning(
                        "Iterations stopped. Value of K (%d) exceeded estimated read length (%d)"
                        % (cfg.iterative_K[count], RL), log)

    if options_storage.stop_after and options_storage.stop_after.startswith(
            'k'):
        support.finish_here(log)
    latest = os.path.join(cfg.output_dir, "K%d" % K)

    if cfg.correct_scaffolds and not options_storage.run_completed:
        if options_storage.continue_mode and os.path.isfile(
                os.path.join(cfg.output_dir, "SCC", "corrected_scaffolds.fasta"
                             )) and not options_storage.restart_from == "scc":
            log.info("\n===== Skipping %s (already processed). \n" %
                     "scaffold correction")
        else:
            if options_storage.continue_mode:
                support.continue_from_here(log)
            run_scaffold_correction(configs_dir, execution_home, cfg, log,
                                    latest, 21)
        latest = os.path.join(os.path.join(cfg.output_dir, "SCC"), "K21")
        if options_storage.stop_after == 'scc':
            support.finish_here(log)

    if cfg.correct_scaffolds:
        correct_scaffolds_fpath = os.path.join(latest,
                                               "corrected_scaffolds.fasta")
        if os.path.isfile(correct_scaffolds_fpath):
            shutil.copyfile(correct_scaffolds_fpath, cfg.result_scaffolds)
    elif not finished_on_stop_after:  # interupted by --stop-after, so final K is not processed!
        if os.path.isfile(os.path.join(latest, "before_rr.fasta")):
            result_before_rr_contigs = os.path.join(
                os.path.dirname(cfg.result_contigs), "before_rr.fasta")
            if not os.path.isfile(result_before_rr_contigs
                                  ) or not options_storage.continue_mode:
                shutil.copyfile(os.path.join(latest, "before_rr.fasta"),
                                result_before_rr_contigs)
        if options_storage.rna:
            if os.path.isfile(os.path.join(latest, "transcripts.fasta")):
                if not os.path.isfile(cfg.result_transcripts
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "transcripts.fasta"),
                                    cfg.result_transcripts)
            if os.path.isfile(os.path.join(latest, "transcripts.paths")):
                if not os.path.isfile(cfg.result_transcripts_paths
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "transcripts.paths"),
                                    cfg.result_transcripts_paths)
            for filtering_type in options_storage.filtering_types:
                prefix = filtering_type + "_filtered_"
                result_filtered_transcripts = os.path.join(
                    cfg.output_dir, prefix + options_storage.transcripts_name)
                latest_filtered_transcripts = os.path.join(
                    latest, prefix + "final_paths.fasta")
                if os.path.isfile(latest_filtered_transcripts):
                    if not os.path.isfile(
                            result_filtered_transcripts
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(latest_filtered_transcripts,
                                        result_filtered_transcripts)
        else:
            if os.path.isfile(os.path.join(latest, "final_contigs.fasta")):
                if not os.path.isfile(cfg.result_contigs
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(
                        os.path.join(latest, "final_contigs.fasta"),
                        cfg.result_contigs)
            if os.path.isfile(os.path.join(latest, "first_pe_contigs.fasta")):
                result_first_pe_contigs = os.path.join(
                    os.path.dirname(cfg.result_contigs),
                    "first_pe_contigs.fasta")
                if not os.path.isfile(result_first_pe_contigs
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(
                        os.path.join(latest, "first_pe_contigs.fasta"),
                        result_first_pe_contigs)
            if cfg.rr_enable:
                if os.path.isfile(os.path.join(latest, "scaffolds.fasta")):
                    if not os.path.isfile(
                            cfg.result_scaffolds
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "scaffolds.fasta"),
                            cfg.result_scaffolds)
                if os.path.isfile(os.path.join(latest, "scaffolds.paths")):
                    if not os.path.isfile(
                            cfg.result_scaffolds_paths
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "scaffolds.paths"),
                            cfg.result_scaffolds_paths)
            if os.path.isfile(
                    os.path.join(latest, "assembly_graph_with_scaffolds.gfa")):
                if not os.path.isfile(cfg.result_graph_gfa
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(
                        os.path.join(latest,
                                     "assembly_graph_with_scaffolds.gfa"),
                        cfg.result_graph_gfa)
            if os.path.isfile(os.path.join(latest, "assembly_graph.fastg")):
                if not os.path.isfile(
                        cfg.result_graph) or not options_storage.continue_mode:
                    shutil.copyfile(
                        os.path.join(latest, "assembly_graph.fastg"),
                        cfg.result_graph)
            if os.path.isfile(os.path.join(latest, "final_contigs.paths")):
                if not os.path.isfile(cfg.result_contigs_paths
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(
                        os.path.join(latest, "final_contigs.paths"),
                        cfg.result_contigs_paths)

    if cfg.developer_mode:
        # saves
        saves_link = os.path.join(os.path.dirname(cfg.result_contigs), "saves")
        if os.path.lexists(
                saves_link
        ):  # exists returns False for broken links! lexists return True
            os.remove(saves_link)
        os.symlink(os.path.join(latest, "saves"), saves_link)

    if os.path.isdir(bin_reads_dir):
        shutil.rmtree(bin_reads_dir)
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)

    return used_K
Esempio n. 15
0
def run_mulksg(configs_dir, execution_home, cfg, dataset_data,
               ext_python_modules_home, log, comm, rank, size):
    #Running this on multiple nodes, need to differentiate the actual work here
    if not isinstance(cfg.iterative_K, list):
        cfg.iterative_K = [cfg.iterative_K]
    cfg.iterative_K = sorted(cfg.iterative_K)

    while (len(cfg.iterative_K) - 1) % size != 0:
        bgap_idx = 0
        bgap = 0
        for i in range(0, len(cfg.iterative_K) - 1):
            gap = cfg.iterative_K[i + 1] - cfg.iterative_K[i]
            if gap > bgap:
                bgap = gap
                bgap_idx = i
        if bgap == 0:
            break
        ul = cfg.iterative_K[i + 1]
        ll = cfg.iterative_K[i]
        new_k = (int)(ul - ll) / 2
        new_k = new_k / 2 * 2 + 1
        cfg.iterative_K.append(new_k)
        cfg.iterative_K = sorted(cfg.iterative_K)
        #add to cfg.iterative_K
        #add a number between the biggest gaps in the current cfg.iterative_K

    if rank == 0:  #Setup all the configs, we will put them in a work_list
        # checking and removing conflicting K-mer directories
        if options_storage.restart_from and (options_storage.restart_k_mers !=
                                             options_storage.original_k_mers):
            processed_K = []
            for k in range(options_storage.MIN_K, options_storage.MAX_K, 2):
                cur_K_dir = os.path.join(cfg.output_dir, "K%d" % k)
                if os.path.isdir(cur_K_dir) and os.path.isfile(
                        os.path.join(cur_K_dir, "final_contigs.fasta")):
                    processed_K.append(k)
            if processed_K:
                RL = get_read_length(cfg.output_dir, processed_K[0],
                                     ext_python_modules_home, log)
                needed_K = update_k_mers_in_special_cases(cfg.iterative_K,
                                                          RL,
                                                          log,
                                                          silent=True)
                needed_K = [k for k in needed_K if k < RL]
                original_K = reveal_original_k_mers(RL)

                k_to_delete = []
                for id, k in enumerate(needed_K):
                    if len(processed_K) == id:
                        if processed_K[-1] == original_K[
                                -1]:  # the last K in the original run was processed in "last_one" mode
                            k_to_delete = [original_K[-1]]
                        break
                    if processed_K[id] != k:
                        k_to_delete = processed_K[id:]
                        break
                if not k_to_delete and (len(processed_K) > len(needed_K)):
                    k_to_delete = processed_K[len(needed_K) - 1:]
                if k_to_delete:
                    log.info(
                        "Restart mode: removing previously processed directories for K=%s "
                        "to avoid conflicts with K specified with --restart-from"
                        % (str(k_to_delete)))
                    for k in k_to_delete:
                        shutil.rmtree(os.path.join(cfg.output_dir, "K%d" % k))
        bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
        if os.path.isdir(bin_reads_dir):
            shutil.rmtree(bin_reads_dir)
        cfg.tmp_dir = support.get_tmp_dir(prefix="mulksg_")

        first_kValues = cfg.iterative_K
        last_kValue = []
        last_kValue.append(first_kValues[-1])
        del first_kValues[-1]
        #process all of the files as last_one
        first_commands, contig_files = create_mulksg_configs(
            configs_dir, execution_home, cfg, log, first_kValues, None, True,
            ext_python_modules_home)
        work_list = first_commands
        if size > 1 and len(cfg.iterative_K) > 1:
            num_running = 0
            for command in work_list:
                if num_running == (size - 1):
                    support.sys_call(command, log)
                    num_running = 0
                else:
                    node_rank = comm.recv()  #source=ANY_SOURCE
                    comm.send(command, node_rank)
                    num_running += 1

            for i in range(1, size):
                node_rank = comm.recv()  #source=ANY_SOURCE
                comm.send("Done", node_rank)
                print "Rank", node_rank, "Finished"
        else:
            if len(cfg.iterative_K) > 1:
                pool = Pool(len(first_commands))
                results = pool.map(
                    support.sys_call,
                    first_commands)  #(command for command in first_commands))
        #Now we have generated all the contig files, concatenate into one file
        extra_contig_filename = None
        try:
            extra_contig_filename = cfg.output_dir + "/extracontigs.fa"  #"/K" + last_kValue[0]
            with open(extra_contig_filename, 'w') as contig_file:
                for fname in contig_files:
                    try:
                        with open(fname) as infile:
                            contig_file.write(infile.read())
                    except:
                        continue

        except:
            log.info("Could not create extra contig file!!!!")
            extra_contig_filename = None

        last_command, contig_files = create_mulksg_configs(
            configs_dir, execution_home, cfg, log, last_kValue,
            extra_contig_filename, True, ext_python_modules_home)
        last_command = last_command[0]
        support.sys_call(last_command, log)
        K = last_kValue[0]
        latest = os.path.join(cfg.output_dir, "K%d" % K)

        finished_on_stop_after = False

        if cfg.correct_scaffolds and not options_storage.run_completed:
            if options_storage.continue_mode and os.path.isfile(
                    os.path.join(cfg.output_dir, "SCC",
                                 "corrected_scaffolds.fasta")
            ) and not options_storage.restart_from == "scc":
                log.info("\n===== Skipping %s (already processed). \n" %
                         "scaffold correction")
            else:
                if options_storage.continue_mode:
                    support.continue_from_here(log)
                run_scaffold_correction(configs_dir, execution_home, cfg, log,
                                        latest, 21)
            latest = os.path.join(os.path.join(cfg.output_dir, "SCC"), "K21")
            if options_storage.stop_after == 'scc':
                support.finish_here(log)

        if cfg.correct_scaffolds:
            correct_scaffolds_fpath = os.path.join(
                latest, "corrected_scaffolds.fasta")
            if os.path.isfile(correct_scaffolds_fpath):
                shutil.copyfile(correct_scaffolds_fpath, cfg.result_scaffolds)
        elif not finished_on_stop_after:  # interupted by --stop-after, so final K is not processed!
            if os.path.isfile(os.path.join(latest, "before_rr.fasta")):
                result_before_rr_contigs = os.path.join(
                    os.path.dirname(cfg.result_contigs), "before_rr.fasta")
                if not os.path.isfile(result_before_rr_contigs
                                      ) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "before_rr.fasta"),
                                    result_before_rr_contigs)
            if options_storage.rna:
                if os.path.isfile(os.path.join(latest, "transcripts.fasta")):
                    if not os.path.isfile(
                            cfg.result_transcripts
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "transcripts.fasta"),
                            cfg.result_transcripts)
                if os.path.isfile(os.path.join(latest, "transcripts.paths")):
                    if not os.path.isfile(
                            cfg.result_transcripts_paths
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "transcripts.paths"),
                            cfg.result_transcripts_paths)
                for filtering_type in options_storage.filtering_types:
                    prefix = filtering_type + "_filtered_"
                    result_filtered_transcripts = os.path.join(
                        cfg.output_dir,
                        prefix + options_storage.transcripts_name)
                    latest_filtered_transcripts = os.path.join(
                        latest, prefix + "final_paths.fasta")
                    if os.path.isfile(latest_filtered_transcripts):
                        if not os.path.isfile(
                                result_filtered_transcripts
                        ) or not options_storage.continue_mode:
                            shutil.copyfile(latest_filtered_transcripts,
                                            result_filtered_transcripts)
            else:
                if os.path.isfile(os.path.join(latest, "final_contigs.fasta")):
                    if not os.path.isfile(
                            cfg.result_contigs
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "final_contigs.fasta"),
                            cfg.result_contigs)
                if os.path.isfile(
                        os.path.join(latest, "first_pe_contigs.fasta")):
                    result_first_pe_contigs = os.path.join(
                        os.path.dirname(cfg.result_contigs),
                        "first_pe_contigs.fasta")
                    if not os.path.isfile(
                            result_first_pe_contigs
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "first_pe_contigs.fasta"),
                            result_first_pe_contigs)
                if cfg.rr_enable:
                    if os.path.isfile(os.path.join(latest, "scaffolds.fasta")):
                        if not os.path.isfile(
                                cfg.result_scaffolds
                        ) or not options_storage.continue_mode:
                            shutil.copyfile(
                                os.path.join(latest, "scaffolds.fasta"),
                                cfg.result_scaffolds)
                    if os.path.isfile(os.path.join(latest, "scaffolds.paths")):
                        if not os.path.isfile(
                                cfg.result_scaffolds_paths
                        ) or not options_storage.continue_mode:
                            shutil.copyfile(
                                os.path.join(latest, "scaffolds.paths"),
                                cfg.result_scaffolds_paths)
                if os.path.isfile(
                        os.path.join(latest,
                                     "assembly_graph_with_scaffolds.gfa")):
                    if not os.path.isfile(
                            cfg.result_graph_gfa
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest,
                                         "assembly_graph_with_scaffolds.gfa"),
                            cfg.result_graph_gfa)
                if os.path.isfile(os.path.join(latest,
                                               "assembly_graph.fastg")):
                    if not os.path.isfile(
                            cfg.result_graph
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "assembly_graph.fastg"),
                            cfg.result_graph)
                if os.path.isfile(os.path.join(latest, "final_contigs.paths")):
                    if not os.path.isfile(
                            cfg.result_contigs_paths
                    ) or not options_storage.continue_mode:
                        shutil.copyfile(
                            os.path.join(latest, "final_contigs.paths"),
                            cfg.result_contigs_paths)

        if os.path.isdir(bin_reads_dir):
            shutil.rmtree(bin_reads_dir)
        if os.path.isdir(cfg.tmp_dir):
            shutil.rmtree(cfg.tmp_dir)

        return first_kValues + last_kValue

    elif rank != 0 and len(cfg.iterative_K) > 1:
        comm.send(rank, 0)
        command = comm.recv(source=0)
        while command != "Done":
            # print rank, command
            support.sys_call(command, log)
            comm.send(rank, 0)
            command = comm.recv(source=0)
        # comm.send(rank, 0) #tell the master I am done and exiting
        return None
Esempio n. 16
0
def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_modules_home, log):
    if not isinstance(cfg.iterative_K, list):
        cfg.iterative_K = [cfg.iterative_K]
    cfg.iterative_K = sorted(cfg.iterative_K)

    # checking and removing conflicting K-mer directories
    if options_storage.restart_from:
        processed_K = []
        for k in range(options_storage.MIN_K, options_storage.MAX_K, 2):
            cur_K_dir = os.path.join(cfg.output_dir, "K%d" % k)
            if os.path.isdir(cur_K_dir) and os.path.isfile(os.path.join(cur_K_dir, "final_contigs.fasta")):
                processed_K.append(k)
        if processed_K:
            RL = get_read_length(cfg.output_dir, processed_K[0], ext_python_modules_home, log)
            needed_K = update_k_mers_in_special_cases(cfg.iterative_K, RL, log, silent=True)
            needed_K = [k for k in needed_K if k < RL]
            original_K = reveal_original_k_mers(RL)

            k_to_delete = []
            for id, k in enumerate(needed_K):
                if len(processed_K) == id:
                    if processed_K[-1] == original_K[-1]: # the last K in the original run was processed in "last_one" mode
                        k_to_delete = [original_K[-1]]
                    break
                if processed_K[id] != k:
                    k_to_delete = processed_K[id:]
                    break
            if not k_to_delete and (len(processed_K) > len(needed_K)):
                k_to_delete = processed_K[len(needed_K) - 1:]
            if k_to_delete:
                log.info("Restart mode: removing previously processed directories for K=%s "
                         "to avoid conflicts with K specified with --restart-from" % (str(k_to_delete)))
                for k in k_to_delete:
                    shutil.rmtree(os.path.join(cfg.output_dir, "K%d" % k))

    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
    if os.path.isdir(bin_reads_dir) and not options_storage.continue_mode:
        shutil.rmtree(bin_reads_dir)
    cfg.tmp_dir = support.get_tmp_dir(prefix="spades_")

    if len(cfg.iterative_K) == 1:
        run_iteration(configs_dir, execution_home, cfg, log, cfg.iterative_K[0], None, True)
        K = cfg.iterative_K[0]
    else:
        run_iteration(configs_dir, execution_home, cfg, log, cfg.iterative_K[0], None, False)
        prev_K = cfg.iterative_K[0]
        RL = get_read_length(cfg.output_dir, cfg.iterative_K[0], ext_python_modules_home, log)
        cfg.iterative_K = update_k_mers_in_special_cases(cfg.iterative_K, RL, log)
        if cfg.iterative_K[1] + 1 > RL:
            if cfg.rr_enable:
                support.warning("Second value of iterative K (%d) exceeded estimated read length (%d). "
                                "Rerunning for the first value of K (%d) with Repeat Resolving" %
                                (cfg.iterative_K[1], RL, cfg.iterative_K[0]), log)
                run_iteration(configs_dir, execution_home, cfg, log, cfg.iterative_K[0], None, True)
                K = cfg.iterative_K[0]
        else:
            rest_of_iterative_K = cfg.iterative_K
            rest_of_iterative_K.pop(0)
            count = 0
            for K in rest_of_iterative_K:
                count += 1
                last_one = count == len(cfg.iterative_K) or (rest_of_iterative_K[count] + 1 > RL)
                run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one)
                prev_K = K
                if last_one:
                    break
            if count < len(cfg.iterative_K):
                support.warning("Iterations stopped. Value of K (%d) exceeded estimated read length (%d)" %
                                (cfg.iterative_K[count], RL), log)

    latest = os.path.join(cfg.output_dir, "K%d" % K)

    for format in [".fasta", ".fastg"]:
        if os.path.isfile(os.path.join(latest, "before_rr" + format)):
            result_before_rr_contigs = os.path.join(os.path.dirname(cfg.result_contigs), "before_rr" + format)
            if not os.path.isfile(result_before_rr_contigs) or not options_storage.continue_mode:
                shutil.copyfile(os.path.join(latest, "before_rr" + format), result_before_rr_contigs)
        if os.path.isfile(os.path.join(latest, "final_contigs" + format)):
            if not os.path.isfile(cfg.result_contigs[:-6] + format) or not options_storage.continue_mode:
                shutil.copyfile(os.path.join(latest, "final_contigs" + format), cfg.result_contigs[:-6] + format)
        if cfg.rr_enable:
            if os.path.isfile(os.path.join(latest, "scaffolds" + format)):
                if not os.path.isfile(cfg.result_scaffolds[:-6] + format) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "scaffolds" + format), cfg.result_scaffolds[:-6] + format)

    if cfg.developer_mode:
        # saves
        saves_link = os.path.join(os.path.dirname(cfg.result_contigs), "saves")
        if os.path.lexists(saves_link): # exists return False for broken link! lexists return True
            os.remove(saves_link)
        os.symlink(os.path.join(latest, "saves"), saves_link)

    if os.path.isdir(bin_reads_dir):
        shutil.rmtree(bin_reads_dir)
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)

    return latest
Esempio n. 17
0
def main(args):
    os.environ["LC_ALL"] = "C"

    if len(args) == 1:
        options_storage.usage(spades_version)
        sys.exit(0)

    log = logging.getLogger('spades')
    log.setLevel(logging.DEBUG)

    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    support.check_binaries(bin_home, log)

    # parse options and safe all parameters to cfg
    cfg, dataset_data = fill_cfg(args, log)

    if options_storage.continue_mode:
        cmd_line, options = get_options_from_params(os.path.join(options_storage.output_dir, "params.txt"))
        if not options:
            support.error("failed to parse command line of the previous run! Please restart from the beginning or specify another output directory.")
        cfg, dataset_data = fill_cfg(options, log)
        if options_storage.restart_from:
            check_cfg_for_restart_from(cfg)
        options_storage.continue_mode = True

    log_filename = os.path.join(cfg["common"].output_dir, "spades.log")
    if options_storage.continue_mode:
        log_handler = logging.FileHandler(log_filename, mode='a')
    else:
        log_handler = logging.FileHandler(log_filename, mode='w')
    log.addHandler(log_handler)

    if options_storage.continue_mode:
        log.info("\n======= SPAdes pipeline continued. Log can be found here: " + log_filename + "\n")
        log.info("Restored from " + cmd_line)
        if options_storage.restart_from:
            updated_params = ""
            flag = False
            for v in args[1:]:
                if v == '-o' or v == '--restart-from':
                    flag = True
                    continue
                if flag:
                    flag = False
                    continue
                updated_params += " " + v
            updated_params = updated_params.strip()
            log.info("with updated parameters: " + updated_params)
            cmd_line += " " + updated_params
        log.info("")

    params_filename = os.path.join(cfg["common"].output_dir, "params.txt")
    params_handler = logging.FileHandler(params_filename, mode='w')
    log.addHandler(params_handler)

    if options_storage.continue_mode:
        log.info(cmd_line)
    else:
        command = "Command line:"
        for v in args:
            command += " " + v
        log.info(command)

    # special case
    if "mismatch_corrector" in cfg and not support.get_lib_ids_by_type(dataset_data, 'paired-end'):
        support.warning('cannot perform mismatch correction without at least one paired-end library! Skipping this step.', log)
        del cfg["mismatch_corrector"]

    print_used_values(cfg, log)
    log.removeHandler(params_handler)

    if not options_storage.continue_mode:
        log.info("\n======= SPAdes pipeline started. Log can be found here: " + log_filename + "\n")

    # splitting interlaced reads and processing Ns in additional contigs if needed
    if support.dataset_has_interlaced_reads(dataset_data) or support.dataset_has_additional_contigs(dataset_data):
        dir_for_split_reads = os.path.join(options_storage.output_dir, 'split_input')
        if support.dataset_has_interlaced_reads(dataset_data):
            if not os.path.isdir(dir_for_split_reads):
                os.makedirs(dir_for_split_reads)
            dataset_data = support.split_interlaced_reads(dataset_data, dir_for_split_reads, log)
        if support.dataset_has_additional_contigs(dataset_data):
            dataset_data = support.process_Ns_in_additional_contigs(dataset_data, dir_for_split_reads, log)
        options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
        cfg["dataset"].yaml_filename = options_storage.dataset_yaml_filename

    try:
        # copying configs before all computations (to prevent its changing at run time)
        tmp_configs_dir = os.path.join(cfg["common"].output_dir, "configs")
        if os.path.isdir(tmp_configs_dir) and not options_storage.continue_mode:
            shutil.rmtree(tmp_configs_dir)
        if not os.path.isdir(tmp_configs_dir):
            dir_util.copy_tree(os.path.join(spades_home, "configs"), tmp_configs_dir, preserve_times=False)

        corrected_dataset_yaml_filename = ''
        if "error_correction" in cfg:
            STAGE_NAME = "Read error correction"
            bh_cfg = merge_configs(cfg["error_correction"], cfg["common"])
            corrected_dataset_yaml_filename = os.path.join(bh_cfg.output_dir, "corrected.yaml")
            if os.path.isfile(corrected_dataset_yaml_filename) and options_storage.continue_mode \
                and not options_storage.restart_from == "ec":
                log.info("\n===== Skipping %s (already processed). \n" % STAGE_NAME)
            else:
                support.continue_from_here(log)

                if "HEAPCHECK" in os.environ:
                    del os.environ["HEAPCHECK"]
                if "heap_check" in bh_cfg.__dict__:
                    os.environ["HEAPCHECK"] = bh_cfg.heap_check

                if os.path.exists(bh_cfg.output_dir):
                    shutil.rmtree(bh_cfg.output_dir)
                os.makedirs(bh_cfg.output_dir)

                if support.get_lib_ids_by_type(dataset_data, options_storage.LONG_READS_TYPES):
                    not_used_dataset_data = support.get_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
                    to_correct_dataset_data = support.rm_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
                    to_correct_dataset_yaml_filename = os.path.join(bh_cfg.output_dir, "to_correct.yaml")
                    pyyaml.dump(to_correct_dataset_data, open(to_correct_dataset_yaml_filename, 'w'))
                    bh_cfg.__dict__["dataset_yaml_filename"] = to_correct_dataset_yaml_filename
                else:
                    not_used_dataset_data = None
                    bh_cfg.__dict__["dataset_yaml_filename"] = cfg["dataset"].yaml_filename

                log.info("\n===== %s started. \n" % STAGE_NAME)
                hammer_logic.run_hammer(corrected_dataset_yaml_filename, tmp_configs_dir, bin_home, bh_cfg, not_used_dataset_data,
                    ext_python_modules_home, log)
                log.info("\n===== %s finished. \n" % STAGE_NAME)

        result_contigs_filename = os.path.join(cfg["common"].output_dir, "contigs.fasta")
        result_scaffolds_filename = os.path.join(cfg["common"].output_dir, "scaffolds.fasta")
        misc_dir = os.path.join(cfg["common"].output_dir, "misc")
        ### if mismatch correction is enabled then result contigs are copied to misc directory
        assembled_contigs_filename = os.path.join(misc_dir, "assembled_contigs.fasta")
        assembled_scaffolds_filename = os.path.join(misc_dir, "assembled_scaffolds.fasta")
        if "assembly" in cfg:
            STAGE_NAME = "Assembling"
            spades_cfg = merge_configs(cfg["assembly"], cfg["common"])
            spades_cfg.__dict__["result_contigs"] = result_contigs_filename
            spades_cfg.__dict__["result_scaffolds"] = result_scaffolds_filename

            if options_storage.continue_mode and (os.path.isfile(spades_cfg.result_contigs)
                                                  or ("mismatch_corrector" in cfg and
                                                      os.path.isfile(assembled_contigs_filename)))\
                and not options_storage.restart_from == 'as' \
                and not (options_storage.restart_from and options_storage.restart_from.startswith('k')):

                log.info("\n===== Skipping %s (already processed). \n" % STAGE_NAME)
                # calculating latest_dir for the next stages
                latest_dir = support.get_latest_dir(os.path.join(spades_cfg.output_dir, "K*"))
                if not latest_dir:
                    support.error("failed to continue the previous run! Please restart from previous stages or from the beginning.", log)
            else:
                old_result_files = [result_contigs_filename, result_scaffolds_filename,
                                    assembled_contigs_filename, assembled_scaffolds_filename]
                for format in [".fasta", ".fastg"]:
                    for old_result_file in old_result_files:
                        if os.path.isfile(old_result_file[:-6] + format):
                            os.remove(old_result_file[:-6] + format)

                if options_storage.restart_from == 'as':
                    support.continue_from_here(log)

                if os.path.isfile(corrected_dataset_yaml_filename):
                    dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
                    dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(corrected_dataset_yaml_filename))
                if spades_cfg.disable_rr:
                    spades_cfg.__dict__["rr_enable"] = False
                else:
                    spades_cfg.__dict__["rr_enable"] = True

                if "HEAPCHECK" in os.environ:
                    del os.environ["HEAPCHECK"]
                if "heap_check" in spades_cfg.__dict__:
                    os.environ["HEAPCHECK"] = spades_cfg.heap_check

                log.info("\n===== %s started.\n" % STAGE_NAME)

                # creating dataset
                dataset_filename = os.path.join(spades_cfg.output_dir, "dataset.info")
                if not os.path.isfile(dataset_filename) or not options_storage.continue_mode:
                    dataset_file = open(dataset_filename, 'w')
                    import process_cfg
                    dataset_file.write("single_cell" + '\t' + process_cfg.bool_to_str(cfg["dataset"].single_cell) + '\n')
                    if os.path.isfile(corrected_dataset_yaml_filename):
                        dataset_file.write("reads" + '\t' + process_cfg.process_spaces(corrected_dataset_yaml_filename) + '\n')
                    else:
                        dataset_file.write("reads" + '\t' + process_cfg.process_spaces(cfg["dataset"].yaml_filename) + '\n')
                    if spades_cfg.developer_mode and "reference" in cfg["dataset"].__dict__:
                        dataset_file.write("reference_genome" + '\t')
                        dataset_file.write(process_cfg.process_spaces(cfg["dataset"].reference) + '\n')
                    dataset_file.close()
                spades_cfg.__dict__["dataset"] = dataset_filename

                latest_dir = spades_logic.run_spades(tmp_configs_dir, bin_home, spades_cfg, dataset_data, ext_python_modules_home, log)

                if os.path.isdir(misc_dir) and not options_storage.continue_mode:
                    shutil.rmtree(misc_dir)
                if not os.path.isdir(misc_dir):
                    os.makedirs(misc_dir)

                if options_storage.continue_mode and options_storage.restart_from and options_storage.restart_from.startswith('k'):
                    k_str = options_storage.restart_from[1:]
                    if k_str.find(":") != -1:
                        k_str = k_str[:k_str.find(":")]
                    support.error("failed to continue from K=%s because this K was not processed in the original run!" % k_str, log)
                log.info("\n===== %s finished. \n" % STAGE_NAME)

            #corrector
            if "mismatch_corrector" in cfg and (os.path.isfile(result_contigs_filename) or
                                                (options_storage.continue_mode and os.path.isfile(assembled_contigs_filename))):
                STAGE_NAME = "Mismatch correction"
                to_correct = dict()
                to_correct["contigs"] = (result_contigs_filename, assembled_contigs_filename)
                if os.path.isfile(result_scaffolds_filename) or (options_storage.continue_mode and
                                                                 os.path.isfile(assembled_scaffolds_filename)):
                    to_correct["scaffolds"] = (result_scaffolds_filename, assembled_scaffolds_filename)

                # moving assembled contigs (scaffolds) to misc dir
                for assembly_type, (old, new) in to_correct.items():
                    if options_storage.continue_mode and os.path.isfile(new):
                        continue
                    for format in [".fasta", ".fastg"]:
                        if os.path.isfile(old[:-6] + format):
                            shutil.move(old[:-6] + format, new[:-6] + format)

                if options_storage.continue_mode and os.path.isfile(result_contigs_filename) and \
                    (os.path.isfile(result_scaffolds_filename) or not os.path.isfile(assembled_scaffolds_filename)) \
                    and not options_storage.restart_from == 'mc':
                    log.info("\n===== Skipping %s (already processed). \n" % STAGE_NAME)
                else:
                    if options_storage.restart_from == 'mc':
                        support.continue_from_here(log)

                    log.info("\n===== %s started." % STAGE_NAME)
                    # detecting paired-end library with the largest insert size
                    est_params_data = pyyaml.load(open(os.path.join(latest_dir, "final.lib_data"), 'r'))
                    max_IS_library = None
                    for reads_library in est_params_data:
                        if reads_library['type'] == 'paired-end':
                            if not max_IS_library or float(reads_library["insert size mean"]) > float(max_IS_library["insert size mean"]):
                                max_IS_library = reads_library
                    if not max_IS_library:
                        support.error('Mismatch correction cannot be performed without at least one paired-end library!', log)
                    if not max_IS_library["insert size mean"]:
                        support.warning('Failed to estimate insert size for all paired-end libraries. Starting Mismatch correction'
                                        ' based on the first paired-end library and with default insert size.', log)
                    else:
                        cfg["mismatch_corrector"].__dict__["insert-size"] = round(max_IS_library["insert size mean"])
                    yaml_dirname = os.path.dirname(options_storage.dataset_yaml_filename)
                    cfg["mismatch_corrector"].__dict__["1"] = list(map(lambda x: os.path.join(yaml_dirname, x),
                        max_IS_library['left reads']))
                    cfg["mismatch_corrector"].__dict__["2"] = list(map(lambda x: os.path.join(yaml_dirname, x),
                        max_IS_library['right reads']))
                    #TODO: add reads orientation

                    import corrector
                    corrector_cfg = cfg["mismatch_corrector"]
                    args = []
                    for key, values in corrector_cfg.__dict__.items():
                        if key == "output-dir":
                            continue

                        # for processing list of reads
                        if not isinstance(values, list):
                            values = [values]
                        for value in values:
                            if len(key) == 1:
                                args.append('-' + key)
                            else:
                                args.append('--' + key)
                            if value is not None:
                                args.append(value)

                    # processing contigs and scaffolds (or only contigs)
                    for assembly_type, (corrected, assembled) in to_correct.items():
                        if options_storage.continue_mode and os.path.isfile(corrected):
                            log.info("\n== Skipping processing of " + assembly_type + " (already processed)\n")
                            continue

                        support.continue_from_here(log)
                        log.info("\n== Processing of " + assembly_type + "\n")

                        cur_args = args[:]
                        cur_args += ['-c', assembled]
                        tmp_dir_for_corrector = support.get_tmp_dir(prefix="mis_cor_%s_" % assembly_type)
                        cur_args += ['--output-dir', tmp_dir_for_corrector]

                        # correcting
                        corrector.main(cur_args, ext_python_modules_home, log)

                        result_corrected_filename = os.path.join(tmp_dir_for_corrector, "corrected_contigs.fasta")
                        # moving corrected contigs (scaffolds) to SPAdes output dir
                        if os.path.isfile(result_corrected_filename):
                            shutil.move(result_corrected_filename, corrected)

                        if os.path.isdir(tmp_dir_for_corrector):
                            shutil.rmtree(tmp_dir_for_corrector)

                        assembled_fastg = assembled[:-6] + ".fastg"
                        if os.path.isfile(assembled_fastg):
                            support.create_fastg_from_fasta(corrected, assembled_fastg, log)
                    log.info("\n===== %s finished.\n" % STAGE_NAME)

        if not cfg["common"].developer_mode and os.path.isdir(tmp_configs_dir):
            shutil.rmtree(tmp_configs_dir)

        #log.info("")
        if "error_correction" in cfg and os.path.isdir(os.path.dirname(corrected_dataset_yaml_filename)):
            log.info(" * Corrected reads are in " + support.process_spaces(os.path.dirname(corrected_dataset_yaml_filename) + "/"))
        if "assembly" in cfg and os.path.isfile(result_contigs_filename):
            message = " * Assembled contigs are in " + support.process_spaces(result_contigs_filename)
            if os.path.isfile(result_contigs_filename[:-6] + ".fastg"):
                message += " (" + os.path.basename(result_contigs_filename[:-6] + ".fastg") + ")"
            log.info(message)
        if "assembly" in cfg and os.path.isfile(result_scaffolds_filename):
            message = " * Assembled scaffolds are in " + support.process_spaces(result_scaffolds_filename)
            if os.path.isfile(result_scaffolds_filename[:-6] + ".fastg"):
                message += " (" + os.path.basename(result_scaffolds_filename[:-6] + ".fastg") + ")"
            log.info(message)
        #log.info("")

        #breaking scaffolds
        if os.path.isfile(result_scaffolds_filename):
            if not os.path.isdir(misc_dir):
                os.makedirs(misc_dir)
            result_broken_scaffolds = os.path.join(misc_dir, "broken_scaffolds.fasta")
            if not os.path.isfile(result_broken_scaffolds) or not options_storage.continue_mode:
                modified, broken_scaffolds = support.break_scaffolds(result_scaffolds_filename,
                    options_storage.THRESHOLD_FOR_BREAKING_SCAFFOLDS)
                if modified:
                    support.write_fasta(result_broken_scaffolds, broken_scaffolds)
                    #log.info(" * Scaffolds broken by " + str(options_storage.THRESHOLD_FOR_BREAKING_SCAFFOLDS) +
                    # " Ns are in " + result_broken_scaffolds)

        ### printing WARNINGS SUMMARY
        if not support.log_warnings(log):
            log.info("\n======= SPAdes pipeline finished.")  # otherwise it finished WITH WARNINGS

        if options_storage.test_mode:
            for result_filename in [result_contigs_filename, result_scaffolds_filename]:
                if os.path.isfile(result_filename):
                    result_fasta = list(support.read_fasta(result_filename))
                    # correctness check: should be one contig of length 1000 bp
                    correct_number = 1
                    correct_length = 1000
                    if not len(result_fasta):
                        support.error("TEST FAILED: %s does not contain contigs!" % result_filename)
                    elif len(result_fasta) > correct_number:
                        support.error("TEST FAILED: %s contains more than %d contig (%d)!" %
                                      (result_filename, correct_number, len(result_fasta)))
                    elif len(result_fasta[0][1]) != correct_length:
                        if len(result_fasta[0][1]) > correct_length:
                            relation = "more"
                        else:
                            relation = "less"
                        support.error("TEST FAILED: %s contains %s than %d bp (%d bp)!" %
                                      (result_filename, relation, correct_length, len(result_fasta[0][1])))
                else:
                    support.error("TEST FAILED: " + result_filename + " does not exist!")
            log.info("\n========= TEST PASSED CORRECTLY.")


        log.info("\nSPAdes log can be found here: " + log_filename)
        log.info("")
        log.info("Thank you for using SPAdes!")
        log.removeHandler(log_handler)

    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            if exc_type == OSError and exc_value.errno == errno.ENOEXEC: # Exec format error
                support.error("It looks like you are using SPAdes binaries for another platform.\n" +
                              support.get_spades_binaries_info_message())
            else:
                log.exception(exc_value)
                support.error("exception caught: %s" % exc_type, log)
    except BaseException: # since python 2.5 system-exiting exceptions (e.g. KeyboardInterrupt) are derived from BaseException
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            support.error("exception caught: %s" % exc_type, log)
Esempio n. 18
0
def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_modules_home, log):
    if not isinstance(cfg.iterative_K, list):
        cfg.iterative_K = [cfg.iterative_K]
    cfg.iterative_K = sorted(cfg.iterative_K)
    used_K = []

    # checking and removing conflicting K-mer directories
    if options_storage.restart_from and (options_storage.restart_k_mers != options_storage.original_k_mers):
        processed_K = []
        for k in range(options_storage.MIN_K, options_storage.MAX_K, 2):
            cur_K_dir = os.path.join(cfg.output_dir, "K%d" % k)
            if os.path.isdir(cur_K_dir) and os.path.isfile(os.path.join(cur_K_dir, "final_contigs.fasta")):
                processed_K.append(k)
        if processed_K:
            RL = get_read_length(cfg.output_dir, processed_K[0], ext_python_modules_home, log)
            needed_K = update_k_mers_in_special_cases(cfg.iterative_K, RL, log, silent=True)
            needed_K = [k for k in needed_K if k < RL]
            original_K = reveal_original_k_mers(RL)

            k_to_delete = []
            for id, k in enumerate(needed_K):
                if len(processed_K) == id:
                    if processed_K[-1] == original_K[-1]:  # the last K in the original run was processed in "last_one" mode
                        k_to_delete = [original_K[-1]]
                    break
                if processed_K[id] != k:
                    k_to_delete = processed_K[id:]
                    break
            if not k_to_delete and (len(processed_K) > len(needed_K)):
                k_to_delete = processed_K[len(needed_K) - 1:]
            if k_to_delete:
                log.info("Restart mode: removing previously processed directories for K=%s "
                         "to avoid conflicts with K specified with --restart-from" % (str(k_to_delete)))
                for k in k_to_delete:
                    shutil.rmtree(os.path.join(cfg.output_dir, "K%d" % k))

    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
    if os.path.isdir(bin_reads_dir) and not options_storage.continue_mode:
        shutil.rmtree(bin_reads_dir)
    cfg.tmp_dir = support.get_tmp_dir(prefix="spades_")

    finished_on_stop_after = False
    K = cfg.iterative_K[0]
    if len(cfg.iterative_K) == 1:
        run_iteration(configs_dir, execution_home, cfg, log, K, None, True)
        used_K.append(K)
    else:
        run_iteration(configs_dir, execution_home, cfg, log, K, None, False)
        used_K.append(K)
        if options_storage.stop_after == "k%d" % K:
            finished_on_stop_after = True
        else:
            prev_K = K
            RL = get_read_length(cfg.output_dir, K, ext_python_modules_home, log)
            cfg.iterative_K = update_k_mers_in_special_cases(cfg.iterative_K, RL, log)
            if len(cfg.iterative_K) < 2 or cfg.iterative_K[1] + 1 > RL:
                if cfg.rr_enable:
                    if len(cfg.iterative_K) < 2:
                        log.info("== Rerunning for the first value of K (%d) with Repeat Resolving" %
                                 cfg.iterative_K[0])
                    else:
                        support.warning("Second value of iterative K (%d) exceeded estimated read length (%d). "
                                        "Rerunning for the first value of K (%d) with Repeat Resolving" %
                                        (cfg.iterative_K[1], RL, cfg.iterative_K[0]), log)
                    run_iteration(configs_dir, execution_home, cfg, log, cfg.iterative_K[0], None, True)
                    used_K.append(cfg.iterative_K[0])
                    K = cfg.iterative_K[0]
            else:
                rest_of_iterative_K = cfg.iterative_K
                rest_of_iterative_K.pop(0)
                count = 0
                for K in rest_of_iterative_K:
                    count += 1
                    last_one = count == len(cfg.iterative_K) or (rest_of_iterative_K[count] + 1 > RL)
                    run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one)
                    used_K.append(K)
                    prev_K = K
                    if last_one:
                        break
                    if options_storage.stop_after == "k%d" % K:
                        finished_on_stop_after = True
                        break
                if count < len(cfg.iterative_K) and not finished_on_stop_after:
                    support.warning("Iterations stopped. Value of K (%d) exceeded estimated read length (%d)" %
                                    (cfg.iterative_K[count], RL), log)

    if options_storage.stop_after and options_storage.stop_after.startswith('k'):
        support.finish_here(log)
    latest = os.path.join(cfg.output_dir, "K%d" % K)

    if cfg.correct_scaffolds and not options_storage.run_completed:
        if options_storage.continue_mode and os.path.isfile(os.path.join(cfg.output_dir, "SCC", "corrected_scaffolds.fasta")) and not options_storage.restart_from == "scc":
            log.info("\n===== Skipping %s (already processed). \n" % "scaffold correction")
        else:
            if options_storage.continue_mode:
                support.continue_from_here(log)
            run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, 21)
        latest = os.path.join(os.path.join(cfg.output_dir, "SCC"), "K21")
        if options_storage.stop_after == 'scc':
            support.finish_here(log)

    if cfg.correct_scaffolds:
        correct_scaffolds_fpath = os.path.join(latest, "corrected_scaffolds.fasta")
        if os.path.isfile(correct_scaffolds_fpath):
            shutil.copyfile(correct_scaffolds_fpath, cfg.result_scaffolds)
    elif not finished_on_stop_after:  # interupted by --stop-after, so final K is not processed!
        if os.path.isfile(os.path.join(latest, "before_rr.fasta")):
            result_before_rr_contigs = os.path.join(os.path.dirname(cfg.result_contigs), "before_rr.fasta")
            if not os.path.isfile(result_before_rr_contigs) or not options_storage.continue_mode:
                shutil.copyfile(os.path.join(latest, "before_rr.fasta"), result_before_rr_contigs)
        if options_storage.rna:
            if os.path.isfile(os.path.join(latest, "transcripts.fasta")):
                if not os.path.isfile(cfg.result_transcripts) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "transcripts.fasta"), cfg.result_transcripts)
            if os.path.isfile(os.path.join(latest, "transcripts.paths")):
                if not os.path.isfile(cfg.result_transcripts_paths) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "transcripts.paths"), cfg.result_transcripts_paths)
            for filtering_type in options_storage.filtering_types:
                prefix = filtering_type + "_filtered_"
                result_filtered_transcripts = os.path.join(cfg.output_dir, prefix + options_storage.transcripts_name)
                latest_filtered_transcripts = os.path.join(latest, prefix + "final_paths.fasta")
                if os.path.isfile(latest_filtered_transcripts):
                    if not os.path.isfile(result_filtered_transcripts) or not options_storage.continue_mode:
                        shutil.copyfile(latest_filtered_transcripts, result_filtered_transcripts)
        else:
            if os.path.isfile(os.path.join(latest, "final_contigs.fasta")):
                if not os.path.isfile(cfg.result_contigs) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "final_contigs.fasta"), cfg.result_contigs)
            if os.path.isfile(os.path.join(latest, "first_pe_contigs.fasta")):
                result_first_pe_contigs = os.path.join(os.path.dirname(cfg.result_contigs), "first_pe_contigs.fasta")
                if not os.path.isfile(result_first_pe_contigs) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "first_pe_contigs.fasta"), result_first_pe_contigs)
            if cfg.rr_enable:
                if os.path.isfile(os.path.join(latest, "scaffolds.fasta")):
                    if not os.path.isfile(cfg.result_scaffolds) or not options_storage.continue_mode:
                        shutil.copyfile(os.path.join(latest, "scaffolds.fasta"), cfg.result_scaffolds)
                if os.path.isfile(os.path.join(latest, "scaffolds.paths")):
                    if not os.path.isfile(cfg.result_scaffolds_paths) or not options_storage.continue_mode:
                        shutil.copyfile(os.path.join(latest, "scaffolds.paths"), cfg.result_scaffolds_paths)
            if os.path.isfile(os.path.join(latest, "assembly_graph_with_scaffolds.gfa")):
                if not os.path.isfile(cfg.result_graph_gfa) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "assembly_graph_with_scaffolds.gfa"), cfg.result_graph_gfa)
            if os.path.isfile(os.path.join(latest, "assembly_graph.fastg")):
                if not os.path.isfile(cfg.result_graph) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "assembly_graph.fastg"), cfg.result_graph)
            if os.path.isfile(os.path.join(latest, "final_contigs.paths")):
                if not os.path.isfile(cfg.result_contigs_paths) or not options_storage.continue_mode:
                    shutil.copyfile(os.path.join(latest, "final_contigs.paths"), cfg.result_contigs_paths)


    if cfg.developer_mode:
        # saves
        saves_link = os.path.join(os.path.dirname(cfg.result_contigs), "saves")
        if os.path.lexists(saves_link):  # exists returns False for broken links! lexists return True
            os.remove(saves_link)
        os.symlink(os.path.join(latest, "saves"), saves_link)

    if os.path.isdir(bin_reads_dir):
        shutil.rmtree(bin_reads_dir)
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)

    return used_K