Esempio n. 1
0
 def Run(self):
     self.__CheckInputExistance()
     command_line = "%s %s %s --limit=%d" % (IgRepConConfig().run_report_supernodes,
                                             self.__params.io.compressed_final_clusters_fa,
                                             self.__params.io.final_stripped_clusters_fa,
                                             self.__params.min_cluster_size)
     support.sys_call(command_line, self._log)
Esempio n. 2
0
 def Run(self):
     self.__CheckInputExistance()
     command_line = "%s %s %s %s" % (IgRepConConfig().run_pair_reads_merger,
                                     self.__params.left_reads,
                                     self.__params.right_reads,
                                     self.__params.single_reads)
     support.sys_call(command_line, self._log)
Esempio n. 3
0
def compress_dataset_files(dataset_data, ext_python_modules_home, max_threads, log):
    log.info("\n== Compressing corrected reads (with gzip)")
    to_compress = []
    for reads_library in dataset_data:
        for key, value in reads_library.items():
            if key.endswith('reads'):
                compressed_reads_filenames = []
                for reads_file in value:
                    if not os.path.isfile(reads_file):
                        support.error('something went wrong and file with corrected reads (' + reads_file + ') is missing!', log)
                    to_compress.append(reads_file)
                    compressed_reads_filenames.append(reads_file + ".gz")
                reads_library[key] = compressed_reads_filenames
    if len(to_compress):
        pigz_path = support.which('pigz')
        if pigz_path:
            for reads_file in to_compress:
                support.sys_call([pigz_path, '-f', '-7', '-p', str(max_threads), reads_file], log)
        else:
            addsitedir(ext_python_modules_home)
            if sys.version.startswith('2.'):
                from joblib2 import Parallel, delayed
            elif sys.version.startswith('3.'):
                from joblib3 import Parallel, delayed
            n_jobs = min(len(to_compress), max_threads)
            outputs = Parallel(n_jobs=n_jobs)(delayed(support.sys_call)(['gzip', '-f', '-7', reads_file]) for reads_file in to_compress)
            for output in outputs:
                if output:
                    log.info(output)
Esempio n. 4
0
def run_corrector(configs_dir, execution_home, cfg,
                ext_python_modules_home, log, to_correct, result):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    dir_util.copy_tree(os.path.join(configs_dir, "corrector"), dst_configs, preserve_times=False)
    cfg_file_name = os.path.join(dst_configs, "corrector.info")

    cfg.tmp_dir = support.get_tmp_dir(prefix="corrector_")

    prepare_config_corr(cfg_file_name, cfg, ext_python_modules_home)
    binary_name = "corrector"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name), os.path.abspath(to_correct)]

    log.info("\n== Running contig polishing tool: " + ' '.join(command) + "\n")


    log.info("\n== Dataset description file was created: " + cfg_file_name + "\n")

    support.sys_call(command, log)
    if not os.path.isfile(result):
        support.error("Mismatch correction finished abnormally: " + result + " not found!")
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 5
0
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K):
    data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K)
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"),
                       dst_configs,
                       preserve_times=False)

    log.info("\n== Running scaffold correction \n")
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs,
                                                  "construction.info")
        process_cfg.substitute_params(
            construction_cfg_file_name,
            {"read_buffer_size": cfg.read_buffer_size}, log)
    process_cfg.substitute_params(
        os.path.join(dst_configs, "moleculo_mode.info"),
        {"scaffolds_file": scaffolds_file}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K)
    command = [
        os.path.join(execution_home, "spades-truseq-scfcorrection"),
        cfg_file_name
    ]
    add_configs(command, dst_configs)
    log.info(str(command))
    support.sys_call(command, log)
Esempio n. 6
0
def run_iteration(configs_dir, execution_home, cfg, log, K, use_additional_contigs, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % (K))
    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)
    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")

    dst_configs = os.path.join(data_dir, "configs")
    shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs)
    cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    prepare_config_spades(cfg_file_name, cfg, log, use_additional_contigs, K, last_one)
    prev_K = K

    command = os.path.join(execution_home, "spades") + " " +\
               os.path.abspath(cfg_file_name)

    if os.path.isdir(bin_reads_dir):
        if glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
            for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
                cor_index = cor_filename.rfind("_cor")
                new_bin_filename = cor_filename[:cor_index] + cor_filename[cor_index + 4:]
                shutil.move(cor_filename, new_bin_filename)

    log.info("\n== Running assembler: " + ("K%d" % (K)) + "\n")
    support.sys_call(command, log)
def run_scaffold_correction(configs_dir, execution_home, cfg, log, K):
    data_dir = os.path.join(cfg.output_dir, "SCC")
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running scaffold correction \n")
    latest = os.path.join(cfg.output_dir, "K%d" % K)
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, scaffolds_file)
    command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name]
    log.info(str(command))
    support.sys_call(command, log)
def run_igrec(input_file,
              output_dir,
              log=None,
              tau=4,
              min_fillin=0.6,
              loci="all",
              threads=16,
              additional_args="",
              min_sread_size=5,
              remove_tmp=True):
    if log is None:
        log = FakeLog()

    args = {
        "path": path_to_igrec,
        "tau": tau,
        "min_fillin": min_fillin,
        "loci": loci,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "min_sread_size": min_sread_size,
        "additional_args": additional_args
    }
    timer = Timer()
    support.sys_call(
        "%(path)s/igrec.py --tau=%(tau)d --min-fillin=%(min_fillin)f -t %(threads)d --loci %(loci)s -s %(input_file)s -o %(output_dir)s --min-sread-size %(min_sread_size)d %(additional_args)s"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")
    if remove_tmp:
        rmdir(output_dir + "/vj_finder")
Esempio n. 9
0
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K):
    data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K)
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)

    log.info("\n== Running scaffold correction \n")
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    process_cfg.substitute_params(os.path.join(dst_configs, "moleculo_mode.info"), {"scaffolds_file": scaffolds_file}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, K)
    command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name]
    add_configs(command, dst_configs)
    log.info(str(command))
    support.sys_call(command, log)
def run_ig_simulator(output_dir,
                     log=None,
                     chain="HC",
                     num_bases=100,
                     num_mutated=1000,
                     repertoire_size=5000,
                     tcr=False):
    if log is None:
        log = FakeLog()

    assert chain in ["HC", "LC"]

    args = {
        "path": path_to_ig_simulator if not tcr else path_to_ig_simulator_tcr,
        "output_dir": output_dir,
        "chain": chain,
        "num_bases": num_bases,
        "num_mutated": num_mutated,
        "repertoire_size": repertoire_size
    }

    timer = Timer()
    cmd = "%(path)s/ig_simulator.py --chain-type %(chain)s --num-bases %(num_bases)d --num-mutated %(num_mutated)d --repertoire-size %(repertoire_size)d -o %(output_dir)s --skip-drawing" % args
    if tcr:
        vgenes = igrec_dir + "/data/germline/human/TCR/TRBV.fa"
        jgenes = igrec_dir + "/data/germline/human/TCR/TRBJ.fa"
        dgenes = igrec_dir + "/data/germline/human/TCR/TRBD.fa"
        cmd += " --vgenes=" + vgenes + " --jgenes=" + jgenes + " --dgenes=" + dgenes
    support.sys_call(cmd, log=log)
    timer.stamp(output_dir + "/time.txt")
def simulate_data(input_file, output_dir, log=None, **kwargs):
    import tempfile
    import shutil

    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    temp_dir = tempfile.mkdtemp()
    run_igrec(input_file, temp_dir, remove_tmp=False,
              tau=1)  # Run IgReC for VJF output

    input_file = temp_dir + "/vj_finder/cleaned_reads.fa"

    simulated_repertoire_to_rcm(input_file,
                                "%s/final_repertoire.rcm" % output_dir)

    simulated_repertoire_to_final_repertoire(
        input_file, "%s/final_repertoire.fa.gz" % output_dir)

    args = {
        "path": igrec_dir,
        "repertoire": output_dir + "/final_repertoire.fa.gz",
        "rcm": output_dir + "/final_repertoire.rcm"
    }
    support.sys_call(
        "%(path)s/py/ig_compress_equal_clusters.py %(repertoire)s %(repertoire)s -r %(rcm)s"
        % args,
        log=log)

    # TODO factor this stage
    jit_fx_file(input_file, "%s/input_reads.fa.gz" % output_dir, **kwargs)

    shutil.rmtree(temp_dir)
Esempio n. 12
0
def run_corrector(configs_dir, execution_home, cfg,
                ext_python_modules_home, log, to_correct, result):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    dir_util.copy_tree(os.path.join(configs_dir, "corrector"), dst_configs, preserve_times=False)
    cfg_file_name = os.path.join(dst_configs, "corrector.info")

    cfg.tmp_dir = support.get_tmp_dir(prefix="corrector_")

    prepare_config_corr(cfg_file_name, cfg, ext_python_modules_home)
    binary_name = "corrector"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name), os.path.abspath(to_correct)]

    log.info("\n== Running contig polishing tool: " + ' '.join(command) + "\n")


    log.info("\n== Dataset description file was created: " + cfg_file_name + "\n")

    support.sys_call(command, log)
    if not os.path.isfile(result):
        support.error("Mismatch correction finished abnormally: " + result + " not found!")
    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
def run_presto(input_file, output_dir, log=None, remove_tmp=True):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    # gunzip
    input_file_new = "%s/input_reads.fasta" % output_dir
    fastx2fastx(input_file, input_file_new)

    args = {"input_file": input_file_new, "output_dir": output_dir}

    timer = Timer()
    support.sys_call(
        "CollapseSeq.py -s %(input_file)s --outdir %(output_dir)s --outname presto"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")

    presto_output = output_dir + "/presto_collapse-unique.fasta"
    repertoire_fa = output_dir + "/final_repertoire.fa"
    with smart_open(presto_output) as fin, smart_open(repertoire_fa,
                                                      "w") as fout:
        for i, record in enumerate(
                SeqIO.parse(fin, idFormatByFileName(presto_output))):
            id = record.description
            size = parse_presto_id(id)
            record.id = record.description = "cluster___%d___size___%d" % (
                i, size)
            SeqIO.write(record, fout, "fasta")

    if remove_tmp:
        os.remove(input_file_new)
        os.remove(presto_output)
Esempio n. 14
0
def run_scaffold_correction(configs_dir, execution_home, cfg, log, K):
    data_dir = os.path.join(cfg.output_dir, "SCC")
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running scaffold correction \n")
    latest = os.path.join(cfg.output_dir, "K%d" % K)
    scaffolds_file = os.path.join(latest, "scaffolds.fasta")
    if not os.path.isfile(scaffolds_file):
        support.error("Scaffodls were not found in " + scaffolds_file, log)
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_scaffold_correction(cfg_file_name, cfg, log, saves_dir, scaffolds_file)
    command = [os.path.join(execution_home, "scaffold_correction"), cfg_file_name]
    log.info(str(command))
    support.sys_call(command, log)
Esempio n. 15
0
def compress_dataset_files(dataset_data, ext_python_modules_home, max_threads, log):
    log.info("\n== Compressing corrected reads (with gzip)")
    to_compress = []
    for reads_library in dataset_data:
        for key, value in reads_library.items():
            if key.endswith('reads'):
                compressed_reads_filenames = []
                for reads_file in value:
                    if not os.path.isfile(reads_file):
                        support.error('something went wrong and file with corrected reads (' + reads_file + ') is missing!', log)
                    to_compress.append(reads_file)
                    compressed_reads_filenames.append(reads_file + ".gz")
                reads_library[key] = compressed_reads_filenames
    if len(to_compress):
        pigz_path = support.which('pigz')
        if pigz_path:
            for reads_file in to_compress:
                support.sys_call([pigz_path, '-f', '-7', '-p', str(max_threads), reads_file], log)
        else:
            addsitedir(ext_python_modules_home)
            if sys.version.startswith('2.'):
                from joblib2 import Parallel, delayed
            elif sys.version.startswith('3.'):
                from joblib3 import Parallel, delayed
            n_jobs = min(len(to_compress), max_threads)
            outputs = Parallel(n_jobs=n_jobs)(delayed(support.sys_call)(['gzip', '-f', '-7', reads_file]) for reads_file in to_compress)
            for output in outputs:
                if output:
                    log.info(output)
Esempio n. 16
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1:
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs)
        # removing template configs
        for root, dirs, files in os.walk(dst_configs):
            for cfg_file in files:
                cfg_file = os.path.join(root, cfg_file)
                if cfg_file.endswith('.info.template'):
                    if os.path.isfile(cfg_file.split('.template')[0]):
                        os.remove(cfg_file)
                    else:
                        os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one)

    command = [os.path.join(execution_home, "spades"), cfg_file_name]

## this code makes sense for src/debruijn/simplification.cpp: corrected_and_save_reads() function which is not used now
#    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")
#    if os.path.isdir(bin_reads_dir):
#        if glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
#            for cor_filename in glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
#                cor_index = cor_filename.rfind("_cor")
#                new_bin_filename = cor_filename[:cor_index] + cor_filename[cor_index + 4:]
#                shutil.move(cor_filename, new_bin_filename)
    support.sys_call(command, log)
Esempio n. 17
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
               not_used_dataset_data, ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    if cfg.iontorrent:
        dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
    else:
        dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
    if cfg.iontorrent:
        prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
        binary_name = "ionhammer"
    else:
        prepare_config_bh(cfg_file_name, cfg, log)
        binary_name = "hammer"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name)]

    log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
    support.sys_call(command, log)
    if not os.path.isfile(corrected_dataset_yaml_filename):
        support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 18
0
 def Run(self, log):
     log.info("Running %s" % self.cmdl)
     try:
         support.sys_call(self.cmdl, log, self.cwd)
     except:
         log.error("Failed to run '%s':\n%s" % (self.cmdl, sys.exc_info()))
         return -1
     log.info("Returned 0")
     return 0
Esempio n. 19
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
               not_used_dataset_data, ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml
    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    if cfg.iontorrent:
        dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
    else:
        dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False)
        cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
    if cfg.iontorrent:
        prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
        binary_name = "ionhammer"
    else:
        prepare_config_bh(cfg_file_name, cfg, log)
        binary_name = "hammer"

    command = [os.path.join(execution_home, binary_name),
               os.path.abspath(cfg_file_name)]

    log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
    support.sys_call(command, log)
    if not os.path.isfile(corrected_dataset_yaml_filename):
        support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
Esempio n. 20
0
 def Run(self):
     self.__CheckInputExistance()
     command_line = IgRepConConfig().run_trie_compressor + " -i " + self.__params.io.cropped_reads + \
                    " -o " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file
     support.sys_call(command_line, self._log)
     command_line = "%s %s %s --limit=%d" % (
         IgRepConConfig().run_report_supernodes,
         self.__params.io.compressed_reads,
         self.__params.io.supernodes_file, self.__params.min_cluster_size)
     support.sys_call(command_line, self._log)
Esempio n. 21
0
 def Run(self):
     self.__CheckInputExistance()
     command_line = "%s %s %s -T %s -m %s -r %s -R %s" % (IgRepConConfig().run_compress_equal_clusters,
                                                          self.__params.io.uncompressed_final_clusters_fa,
                                                          self.__params.io.compressed_final_clusters_fa,
                                                          self.__params.io.tmp_compressed_clusters_fa,
                                                          self.__params.io.tmp_compressed_clusters_map,
                                                          self.__params.io.uncompressed_final_rcm,
                                                          self.__params.io.compressed_final_rcm)
     support.sys_call(command_line, self._log)
def main():
    args = parse_args()

    # create logger
    log = logging.getLogger("Mismatch correction " + args.assembly_type)
    log.setLevel(logging.DEBUG)
    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter("%(message)s"))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    # moving assembled contigs (scaffolds) to misc dir
    if os.path.isfile(args.corrected):
        shutil.move(args.corrected, args.assembled)

    # TODO can check only here, that assembled existst and may be skipping...
    if not os.path.isfile(args.assembled) or os.path.getsize(
            args.assembled) == 0:
        log.info("\n== Skipping processing of %s (empty file)\n" %
                 args.assembly_type)
    else:
        log.info("\n== Processing of %s\n" % args.assembly_type)
        tmp_dir_for_corrector = os.path.join(args.output_dir,
                                             "mismatch_corrector",
                                             args.assembly_type)

        # correcting
        result_corrected_filename = os.path.join(tmp_dir_for_corrector,
                                                 "corrected_contigs.fasta")

        dst_configs = os.path.join(tmp_dir_for_corrector, "configs")
        cfg_file_name = os.path.join(dst_configs, "corrector.info")

        binary_name = "spades-corrector-core"
        command = [
            os.path.join(args.bin_home, binary_name),
            os.path.abspath(cfg_file_name),
            os.path.abspath(args.assembled)
        ]

        log.info("\n== Running contig polishing tool: " + ' '.join(command) +
                 "\n")
        log.info("\n== Dataset description file was created: " +
                 cfg_file_name + "\n")
        log.info("Run: " + ' '.join(command))

        support.sys_call(command, log)

        if not os.path.isfile(result_corrected_filename):
            log.error(
                "mismatch correction finished abnormally: %s not found!" %
                result_corrected_filename)

        if os.path.isfile(result_corrected_filename):
            shutil.copyfile(result_corrected_filename, args.corrected)
Esempio n. 23
0
def move_dataset_files(dataset_data,
                       dst,
                       ext_python_modules_home,
                       max_threads,
                       log,
                       gzip=False):
    to_compress = []
    for reads_library in dataset_data:
        for key, value in reads_library.items():
            if key.endswith('reads'):
                moved_reads_files = []
                for reads_file in value:
                    dst_filename = os.path.join(dst,
                                                os.path.basename(reads_file))
                    # TODO: fix problem with files with the same basenames in Hammer binary!
                    if not os.path.isfile(reads_file):
                        if (not gzip and os.path.isfile(dst_filename)) or (
                                gzip and os.path.isfile(dst_filename + '.gz')):
                            support.warning(
                                'file with corrected reads (' + reads_file +
                                ') is the same in several libraries', log)
                            if gzip:
                                dst_filename += '.gz'
                        else:
                            support.error(
                                'something went wrong and file with corrected reads ('
                                + reads_file + ') is missing!', log)
                    else:
                        shutil.move(reads_file, dst_filename)
                        if gzip:
                            to_compress.append(dst_filename)
                            dst_filename += '.gz'
                    moved_reads_files.append(dst_filename)
                reads_library[key] = moved_reads_files
    if len(to_compress):
        pigz_path = support.which('pigz')
        if pigz_path:
            for reads_file in to_compress:
                support.sys_call([
                    pigz_path, '-f', '-7', '-p',
                    str(max_threads), reads_file
                ], log)
        else:
            addsitedir(ext_python_modules_home)
            if sys.version.startswith('2.'):
                from joblib2 import Parallel, delayed
            elif sys.version.startswith('3.'):
                from joblib3 import Parallel, delayed
            n_jobs = min(len(to_compress), max_threads)
            outputs = Parallel(n_jobs=n_jobs)(
                delayed(support.sys_call)(['gzip', '-f', '-7', reads_file])
                for reads_file in to_compress)
            for output in outputs:
                if output:
                    log.info(output)
Esempio n. 24
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1 \
                and options_storage.restart_from.startswith("k%d:" % K):
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util._path_created = {}  # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree
        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log)
    if "scaffolding_mode" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log)

    prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log)
    prepare_config_construction(os.path.join(dst_configs, "construction.info"), log)
    cfg_fn = os.path.join(dst_configs, "config.info")
    prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)

    command = [os.path.join(execution_home, "spades-core"), cfg_fn]

    add_configs(command, dst_configs)

    #print("Calling: " + " ".join(command))
    support.sys_call(command, log)
Esempio n. 25
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1 \
                and options_storage.restart_from.startswith("k%d:" % K):
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util._path_created = {}  # see http://stackoverflow.com/questions/9160227/dir-util-copy-tree-fails-after-shutil-rmtree
        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "construction.info"), {"read_buffer_size": cfg.read_buffer_size}, log)
    if "scaffolding_mode" in cfg.__dict__:
        #FIXME why here???
        process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log)

    prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log)
    prepare_config_construction(os.path.join(dst_configs, "construction.info"), log)
    cfg_fn = os.path.join(dst_configs, "config.info")
    prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)

    command = [os.path.join(execution_home, "spades-core"), cfg_fn]

    add_configs(command, dst_configs)

    #print("Calling: " + " ".join(command))
    support.sys_call(command, log)
def run_mixcr2_alignment_only(input_file,
                              output_dir,
                              log=None,
                              loci="all",
                              enforce_fastq=False,
                              threads=16,
                              remove_tmp=True,
                              species="hsa"):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    if enforce_fastq and idFormatByFileName(input_file) == "fasta":
        input_file_fq = "%s/input_reads.fq" % output_dir
        fastx2fastx(input_file, input_file_fq)
        input_file = input_file_tmp = input_file_fq
    elif idFormatByFileName(input_file) == "fasta":
        input_file_fasta = "%s/input_reads.fasta" % output_dir
        fastx2fastx(input_file, input_file_fasta)
        input_file = input_file_tmp = input_file_fasta
    else:
        input_file_tmp = None

    path = path_to_mixcr2
    args = {
        "path": path,
        "compress_eq_clusters_cmd":
        path_to_igrec + "/py/ig_compress_equal_clusters.py",
        "mixcr_cmd": "java -jar %s/mixcr.jar" % path,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "species": species,
        "loci": loci,
        "loci_arg": "chains"
    }

    # support.sys_call("%(mixcr_cmd)s align -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --%(loci_arg)s %(loci)s --noMerge --species %(species)s %(input_file)s %(output_dir)s/mixcr.vdjca" % args,
    #                  log=log)
    timer = Timer()
    support.sys_call(
        "%(mixcr_cmd)s align -p kaligner2 --species %(species)s -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --noMerge --%(loci_arg)s %(loci)s -OreadsLayout=Collinear -OvParameters.geneFeatureToAlign=VTranscript -OallowPartialAlignments=true %(input_file)s %(output_dir)s/mixcr.vdjca"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")

    if remove_tmp:
        if input_file_tmp is not None:
            os.remove(input_file_tmp)

        os.remove(output_dir + "/align_report.txt")
        os.remove(output_dir + "/mixcr.vdjca")
Esempio n. 27
0
def run_bh(result_filename, configs_dir, execution_home, cfg,
           ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    shutil.copytree(os.path.join(configs_dir, "hammer"), dst_configs)
    cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    prepare_config_bh(cfg_file_name, cfg, log)

    command = [
        os.path.join(execution_home, "hammer"),
        os.path.abspath(cfg_file_name)
    ]

    log.info("\n== Running read error correction tool: " + ' '.join(command) +
             "\n")
    support.sys_call(command, log)
    corrected_dataset_yaml_filename = os.path.join(cfg.tmp_dir,
                                                   "corrected.yaml")
    if not os.path.isfile(corrected_dataset_yaml_filename):
        support.error("read error correction finished abnormally: " +
                      corrected_dataset_yaml_filename + " not found!")
    corrected_dataset_data = pyyaml.load(
        open(corrected_dataset_yaml_filename, 'r'))
    if cfg.gzip_output:
        log.info("\n== Compressing corrected reads (with gzip)")
    move_dataset_files(corrected_dataset_data, cfg.output_dir,
                       ext_python_modules_home, cfg.max_threads, log,
                       cfg.gzip_output)
    corrected_dataset_yaml_filename = result_filename
    pyyaml.dump(corrected_dataset_data,
                open(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file created: " +
             corrected_dataset_yaml_filename + "\n")

    shutil.rmtree(cfg.tmp_dir)
Esempio n. 28
0
 def Run(self):
     self.__CheckInputExistance()
     self.__params.vj_finder_output = os.path.join(self.__params.output,
                                                   "vj_finder")
     command_line = IgRepConConfig().run_vj_aligner + " -i " + self.__params.single_reads + \
                    " -o " + self.__params.io.vj_finder_output + \
                    " --db-directory " + IgRepConConfig().path_to_germline + \
                    " -t " + str(self.__params.num_threads) + \
                    " --loci " + self.__params.loci + \
                    " --organism " + self.__params.organism
     if self.__params.no_pseudogenes:
         command_line += " --no-pseudogenes"
     support.sys_call(command_line, self._log)
Esempio n. 29
0
def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    stage = BASE_STAGE
    saves_dir = os.path.join(data_dir, 'saves')
    dst_configs = os.path.join(data_dir, "configs")
    cfg_file_name = os.path.join(dst_configs, "config.info")

    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")) and not (options_storage.restart_from and
            (options_storage.restart_from == ("k%d" % K) or options_storage.restart_from.startswith("k%d:" % K))):
            log.info("\n== Skipping assembler: " + ("K%d" % K) + " (already processed)")
            return
        if options_storage.restart_from and options_storage.restart_from.find(":") != -1:
            stage = options_storage.restart_from[options_storage.restart_from.find(":") + 1:]
        support.continue_from_here(log)

    if stage != BASE_STAGE:
        if not os.path.isdir(saves_dir):
            support.error("Cannot restart from stage %s: saves were not found (%s)!" % (stage, saves_dir))
    else:
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.makedirs(data_dir)

        dir_util.copy_tree(os.path.join(configs_dir, "debruijn"), dst_configs, preserve_times=False)
        # removing template configs
        for root, dirs, files in os.walk(dst_configs):
            for cfg_file in files:
                cfg_file = os.path.join(root, cfg_file)
                if cfg_file.endswith('.info.template'):
                    if os.path.isfile(cfg_file.split('.template')[0]):
                        os.remove(cfg_file)
                    else:
                        os.rename(cfg_file, cfg_file.split('.template')[0])

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    if prev_K:
        additional_contigs_fname = os.path.join(cfg.output_dir, "K%d" % prev_K, "simplified_contigs.fasta")
        if not os.path.isfile(additional_contigs_fname):
            support.warning("additional contigs for K=%d were not found (%s)!" % (K, additional_contigs_fname), log)
            additional_contigs_fname = None
    else:
        additional_contigs_fname = None
    if "read_buffer_size" in cfg.__dict__:
        construction_cfg_file_name = os.path.join(dst_configs, "construction.info")
        process_cfg.substitute_params(construction_cfg_file_name, {"read_buffer_size": cfg.read_buffer_size}, log)
    prepare_config_spades(cfg_file_name, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)

    command = [os.path.join(execution_home, "spades"), cfg_file_name]
    support.sys_call(command, log)
Esempio n. 30
0
 def Run(self):
     self.__CheckInputExistance()
     command_line = "%s -i %s -c %s -q %s -o %s" % (
         IgRepConConfig().run_rcm_recoverer, self.__params.io.cropped_reads,
         self.__params.io.map_file,
         self.__params.io.dense_sgraph_decomposition,
         self.__params.io.uncompressed_final_rcm)
     support.sys_call(command_line, self._log)
     command_line = IgRepConConfig().run_consensus_constructor + \
                    " -i " + self.__params.io.cropped_reads + \
                    " -R " + self.__params.io.uncompressed_final_rcm + \
                    " -o " + self.__params.io.uncompressed_final_clusters_fa + \
                    " -H " + " -t " + str(self.__params.num_threads)
     support.sys_call(command_line, self._log)
Esempio n. 31
0
def run_iteration(configs_dir, execution_home, cfg, log, K,
                  use_additional_contigs, last_one):
    data_dir = os.path.join(cfg.output_dir, "K%d" % K)
    if options_storage.continue_mode:
        if os.path.isfile(os.path.join(data_dir, "final_contigs.fasta")):
            log.info("\n== Skipping assembler: " + ("K%d" % K) +
                     " (already processed)")
            return
        else:
            options_storage.continue_mode = False  # continue from here

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)
    bin_reads_dir = os.path.join(cfg.output_dir, ".bin_reads")

    dst_configs = os.path.join(data_dir, "configs")
    shutil.copytree(os.path.join(configs_dir, "debruijn"), dst_configs)
    cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    prepare_config_spades(cfg_file_name, cfg, log, use_additional_contigs, K,
                          last_one)

    command = [
        os.path.join(execution_home, "spades"),
        os.path.abspath(cfg_file_name)
    ]

    if os.path.isdir(bin_reads_dir):
        if glob.glob(os.path.join(bin_reads_dir, "*_cor*")):
            for cor_filename in glob.glob(os.path.join(bin_reads_dir,
                                                       "*_cor*")):
                cor_index = cor_filename.rfind("_cor")
                new_bin_filename = cor_filename[:cor_index] + cor_filename[
                    cor_index + 4:]
                shutil.move(cor_filename, new_bin_filename)

    log.info("\n== Running assembler: " + ("K%d" % K) + "\n")
    support.sys_call(command, log)
Esempio n. 32
0
def get_igblast_output(args):
    args.input_hash = hash_file(args.input)
    args.igblast_output = args.storage_dir + "/" + args.input_hash + ".blast"

    if args.rerun_igblast or not os.path.exists(args.igblast_output + ".gz"):
        log.info("IgBLAST output will be written to " + args.igblast_output + ".gz")
        fq2fa(args.input, args.tmp_file)
        igblast_time = time.time()
        support.sys_call("bash %(workdir)s/blast.sh %(tmp_file)s %(igblast_output)s 2> /dev/null" % args.__dict__, log)
        igblast_time = time.time() - igblast_time
        os.unlink(args.tmp_file)
        support.sys_call("gzip %s --force" % args.igblast_output, log)
        log.info("IgBLAST time: %fs" % igblast_time)

    blast = ParseIgBlastOutput(args.igblast_output + ".gz", log, smart_open)
    # Normalize blast_blocks
    return [line.hit_table for line in blast.blocks]
def main():
    log = igrec.CreateLogger()
    parser, params = ParseCommandLineParams(log)
    CheckParamsCorrectness(parser, params, log)
    try:
        if not os.path.exists(params.output):
            os.makedirs(params.output)
        igrec.CreateFileLogger(params, log)
        igrec.PrintCommandLine(log)
        final_dir = InitMakeFiles(params, log)
        # We need freshly compiled version to get actual build info
        if not params.no_compilation:
            support.sys_call(
                "make -C " +
                os.path.join(os.path.dirname(final_dir), "compilation"), log)
        from src.build_info.build_info import BuildInfo
        print "===================Build info==================="
        BuildInfo().Log(log)
        print "================================================"
        support.sys_call("make -C " + final_dir, log)
        PrintOutputFiles(params, log)
        log.info("\nThank you for using BarcodedIgReC!")
    except KeyboardInterrupt:
        log.info("\nBarcodedIgReC was interrupted!")
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            SupportInfo(log)
            sys.exit(exc_value)
    except BaseException:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            SupportInfo(log)
            sys.exit(exc_value)

    log.info("Log was written to " + params.log_filename)
def run_vjfinder(input_file,
                 output_dir,
                 log=None,
                 loci="all",
                 threads=16,
                 additional_args="",
                 remove_tmp=False):
    if log is None:
        log = FakeLog()

    import os.path
    import os

    args = {
        "path": path_to_igrec,
        "loci": loci,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "organism": "human",
        "path_to_germline": igrec_dir + "/data/germline",
        "additional_args": additional_args
    }
    args = dict2class(args)

    command_line = args.path + "/build/release/bin/vj_finder" + \
        " -i " + os.path.abspath(args.input_file) + \
        " -o " + os.path.abspath(args.output_dir) + \
        " --db-directory " + os.path.abspath(args.path_to_germline) + \
        " -t " + str(args.threads) + \
        " --loci " + args.loci + \
        " --organism " + args.organism + " " + args.additional_args
    cwd = os.getcwd()
    os.chdir(igrec_dir)
    timer = Timer()
    support.sys_call(command_line, log=log)
    timer.stamp(output_dir + "/time.txt")
    os.chdir(cwd)
    if remove_tmp:
        import os.path
        if os.path.isfile(output_dir):
            import shutil
            shutil.rmtree(output_dir)
def main():
    log = igrec.CreateLogger()
    parser, params = ParseCommandLineParams(log)
    CheckParamsCorrectness(parser, params, log)
    try:
        if not os.path.exists(params.output):
            os.makedirs(params.output)
        igrec.CreateFileLogger(params, log)
        igrec.PrintCommandLine(log)
        final_dir = InitMakeFiles(params, log)
        # We need freshly compiled version to get actual build info
        if not params.no_compilation:
            support.sys_call("make -C " + os.path.join(os.path.dirname(final_dir), "compilation"), log)
        print "===================Build info==================="
        from py import build_info
        build_info.Log(log)
        print "================================================"
        support.sys_call("make -C " + final_dir, log)
        PrintOutputFiles(params, log)
        log.info("\nThank you for using BarcodedIgReC!")
    except KeyboardInterrupt:
        log.info("\nBarcodedIgReC was interrupted!")
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            SupportInfo(log)
            sys.exit(exc_value)
    except BaseException:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            SupportInfo(log)
            sys.exit(exc_value)

    log.info("Log was written to " + params.log_filename)
Esempio n. 36
0
def run_bh(result_filename, configs_dir, execution_home, cfg, ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    shutil.copytree(os.path.join(configs_dir, "hammer"), dst_configs)
    cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    prepare_config_bh(cfg_file_name, cfg, log)

    command = [os.path.join(execution_home, "hammer"),
               os.path.abspath(cfg_file_name)]

    log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
    support.sys_call(command, log)
    corrected_dataset_yaml_filename = os.path.join(cfg.tmp_dir, "corrected.yaml")
    if not os.path.isfile(corrected_dataset_yaml_filename):
        support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    if cfg.gzip_output:
        log.info("\n== Compressing corrected reads (with gzip)")
    move_dataset_files(corrected_dataset_data, cfg.output_dir, ext_python_modules_home, cfg.max_threads, log, cfg.gzip_output)
    corrected_dataset_yaml_filename = result_filename
    pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file created: " + corrected_dataset_yaml_filename + "\n")

    shutil.rmtree(cfg.tmp_dir)
def run_igrec_old(input_file,
                  output_dir,
                  log=None,
                  tau=3,
                  threads=16,
                  additional_args="",
                  remove_tmp=True):
    if log is None:
        log = FakeLog()

    output_dir = os.path.abspath(output_dir)
    input_file = os.path.abspath(input_file)
    args = {
        "path": path_to_igrec_old,
        "tau": tau,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "additional_args": additional_args
    }
    timer = Timer()
    cwd = os.getcwd()
    os.chdir(path_to_igrec_old)
    support.sys_call(
        "%(path)s/ig_repertoire_constructor.py --tau=%(tau)d -t %(threads)d -s %(input_file)s -o %(output_dir)s %(additional_args)s"
        % args,
        log=log)
    os.chdir(cwd)
    timer.stamp(output_dir + "/time.txt")

    # Rename output
    os.rename(output_dir + "/constructed_repertoire.clusters.fa",
              output_dir + "/final_repertoire.fa")
    os.rename(output_dir + "/constructed_repertoire.rcm",
              output_dir + "/final_repertoire.rcm")

    if remove_tmp:
        rmdir(output_dir + "/configs")
        rmdir(output_dir + "/saves")
        rmdir(output_dir + "/temp_files")
        rmdir(output_dir + "/hamming_graphs_tau_%d" % tau)
Esempio n. 38
0
 def Run(self):
     self.__CheckInputExistance()
     if not self.__params.no_alignment:
         self.__params.vj_finder_output = os.path.join(self.__params.output, "vj_finder")
         command_line = os.path.abspath(IgRepConConfig().run_vj_aligner) + \
             " -i " + os.path.abspath(self.__params.single_reads) + \
             " -o " + os.path.abspath(self.__params.io.vj_finder_output) + \
             " --db-directory " + os.path.abspath(IgRepConConfig().path_to_germline) + \
             " -t " + str(self.__params.num_threads) + \
             " --loci " + self.__params.loci + \
             " --organism " + self.__params.organism
         if self.__params.no_pseudogenes:
             command_line += " --pseudogenes=off"
         else:
             command_line += " --pseudogenes=on"
         cwd = os.getcwd()
         os.chdir(home_directory)
         support.sys_call(command_line, self._log)
         os.chdir(cwd)
     else:
         self._log.info("VJ Finder stage skipped")
         self.__params.io.cropped_reads = self.__params.single_reads
def RunTool(params, log):
    try:
        igs_command_line = ig_simulator_bin + " " + \
                           params.output_config_file
        support.sys_call(igs_command_line, log)
        log.info("\nThank you for using " + tool_name + "!\n")
    except (KeyboardInterrupt):
        log.info("\n" + tool_name + " was interrupted!")
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
    except BaseException:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
Esempio n. 40
0
def move_dataset_files(dataset_data, dst, ext_python_modules_home, max_threads, log, gzip=False):
    to_compress = []
    for reads_library in dataset_data:
        for key, value in reads_library.items():
            if key.endswith('reads'):
                moved_reads_files = []
                for reads_file in value:
                    dst_filename = os.path.join(dst, os.path.basename(reads_file))
                    # TODO: fix problem with files with the same basenames in Hammer binary!
                    if not os.path.isfile(reads_file):
                        if (not gzip and os.path.isfile(dst_filename)) or (gzip and os.path.isfile(dst_filename + '.gz')):
                            support.warning('file with corrected reads (' + reads_file + ') is the same in several libraries', log)
                            if gzip:
                                dst_filename += '.gz'
                        else:
                            support.error('something went wrong and file with corrected reads (' + reads_file + ') is missing!', log)
                    else:
                        shutil.move(reads_file, dst_filename)
                        if gzip:
                            to_compress.append(dst_filename)
                            dst_filename += '.gz'
                    moved_reads_files.append(dst_filename)
                reads_library[key] = moved_reads_files
    if len(to_compress):
        pigz_path = support.which('pigz')
        if pigz_path:
            for reads_file in to_compress:
                support.sys_call([pigz_path, '-f', '-7', '-p', str(max_threads), reads_file], log)
        else:
            addsitedir(ext_python_modules_home)
            if sys.version.startswith('2.'):
                from joblib2 import Parallel, delayed
            elif sys.version.startswith('3.'):
                from joblib3 import Parallel, delayed
            n_jobs = min(len(to_compress), max_threads)
            outputs = Parallel(n_jobs=n_jobs)(delayed(support.sys_call)(['gzip', '-f', '-7', reads_file]) for reads_file in to_compress)
            for output in outputs:
                if output:
                    log.info(output)
Esempio n. 41
0
def run_bh(configs_dir, execution_home, cfg, ext_python_modules_home, log):
    addsitedir(ext_python_modules_home)
    import pyyaml

    dst_configs = os.path.join(cfg.output_dir, "configs")
    if os.path.exists(dst_configs):
        shutil.rmtree(dst_configs)
    shutil.copytree(os.path.join(configs_dir, "hammer"), dst_configs)
    cfg_file_name = os.path.join(dst_configs, "config.info")
    # removing template configs
    for root, dirs, files in os.walk(dst_configs):
        for cfg_file in files:
            cfg_file = os.path.join(root, cfg_file)
            if cfg_file.endswith('.info.template'):
                if os.path.isfile(cfg_file.split('.template')[0]):
                    os.remove(cfg_file)
                else:
                    os.rename(cfg_file, cfg_file.split('.template')[0])

    prepare_config_bh(cfg_file_name, cfg, log)

    command = os.path.join(execution_home, "hammer") + " " +\
               os.path.abspath(cfg_file_name)

    log.info("\n== Running read error correction tool: " + command + "\n")
    support.sys_call(command, log)
    corrected_dataset_yaml_filename = os.path.join(cfg.tmp_dir, "corrected.yaml")
    corrected_dataset_data = pyyaml.load(file(corrected_dataset_yaml_filename, 'r'))
    if cfg.gzip_output:
        log.info("\n== Compressing corrected reads (with gzip)")
    support.move_dataset_files(corrected_dataset_data, cfg.output_dir, log, cfg.gzip_output)
    corrected_dataset_yaml_filename = os.path.join(cfg.output_dir, "corrected.yaml")
    pyyaml.dump(corrected_dataset_data, file(corrected_dataset_yaml_filename, 'w'))
    log.info("\n== Dataset description file created: " + corrected_dataset_yaml_filename + "\n")

    shutil.rmtree(cfg.tmp_dir)
    return corrected_dataset_yaml_filename
Esempio n. 42
0
    def Run(self):
        self.__CheckInputExistance()
        command_line = IgRepConConfig().run_trie_compressor + " -i " + self.__params.io.cropped_reads + \
                    " -o " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file + " -Toff"
        support.sys_call(command_line, self._log)

        command_line = IgRepConConfig().run_triecmp_to_repertoire + " -i " + self.__params.io.cropped_reads + \
                       " -c " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file + \
                       " -r " + self.__params.io.supernode_repertoire + " -R " + self.__params.io.supernode_rcm
        support.sys_call(command_line, self._log)
        command_line = "%s %s %s --limit=%d" % (IgRepConConfig().run_report_supernodes,
                                                self.__params.io.supernode_repertoire,
                                                self.__params.io.supernodes_file,
                                                self.__params.min_cluster_size)
        support.sys_call(command_line, self._log)

        if not self.__params.equal_compression:
            command_line = IgRepConConfig().run_fake_trie_compressor + " -i " + self.__params.io.cropped_reads + \
                        " -o " + self.__params.io.compressed_reads + " -m " + self.__params.io.map_file
            support.sys_call(command_line, self._log)
Esempio n. 43
0
def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
               dataset_data, ext_python_modules_home, only_compressing_is_needed, log):
    addsitedir(ext_python_modules_home)
    if sys.version.startswith('2.'):
        import pyyaml2 as pyyaml
    elif sys.version.startswith('3.'):
        import pyyaml3 as pyyaml

    # not all reads need processing
    if support.get_lib_ids_by_type(dataset_data, options_storage.LONG_READS_TYPES):
        not_used_dataset_data = support.get_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
        to_correct_dataset_data = support.rm_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
        to_correct_dataset_yaml_filename = os.path.join(cfg.output_dir, "to_correct.yaml")
        pyyaml.dump(to_correct_dataset_data, open(to_correct_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500)
        cfg.dataset_yaml_filename = to_correct_dataset_yaml_filename
    else:
        not_used_dataset_data = None

    if not only_compressing_is_needed:
        dst_configs = os.path.join(cfg.output_dir, "configs")
        if os.path.exists(dst_configs):
            shutil.rmtree(dst_configs)
        if cfg.iontorrent:
            dir_util.copy_tree(os.path.join(configs_dir, "ionhammer"), dst_configs, preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "ionhammer.cfg")
        else:
            dir_util.copy_tree(os.path.join(configs_dir, "hammer"), dst_configs, preserve_times=False)
            cfg_file_name = os.path.join(dst_configs, "config.info")

        cfg.tmp_dir = support.get_tmp_dir(prefix="hammer_")
        if cfg.iontorrent:
            prepare_config_ih(cfg_file_name, cfg, ext_python_modules_home)
            binary_name = "ionhammer"
        else:
            prepare_config_bh(cfg_file_name, cfg, log)
            binary_name = "hammer"

        command = [os.path.join(execution_home, binary_name),
                   os.path.abspath(cfg_file_name)]

        log.info("\n== Running read error correction tool: " + ' '.join(command) + "\n")
        support.sys_call(command, log)
        if not os.path.isfile(corrected_dataset_yaml_filename):
            support.error("read error correction finished abnormally: " + corrected_dataset_yaml_filename + " not found!")
    else:
        log.info("\n===== Skipping %s (already processed). \n" % "read error correction tool")
        support.continue_from_here(log)

    corrected_dataset_data = pyyaml.load(open(corrected_dataset_yaml_filename, 'r'))
    remove_not_corrected_reads(cfg.output_dir)
    is_changed = False
    if cfg.gzip_output:
        is_changed = True
        compress_dataset_files(corrected_dataset_data, ext_python_modules_home, cfg.max_threads, log)
    if not_used_dataset_data:
        is_changed = True
        corrected_dataset_data += not_used_dataset_data
    if is_changed:
        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500)
    log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")

    if os.path.isdir(cfg.tmp_dir):
        shutil.rmtree(cfg.tmp_dir)
def main(argv, external_logger = ""):
    from argparse import ArgumentParser
    parser = ArgumentParser(description="== DSF: an algorithm for corrupted cliques search ==",
                            epilog="""
                            In case you have troubles running DSF, you can write to [email protected].
                            Please provide us with dense_subgraph_finder.log file from the output directory.
                            """,
                            add_help=False)
    req_args = parser.add_argument_group("Input")
    input_args = req_args.add_mutually_exclusive_group(required=True)
    input_args.add_argument("-g", "--graph",
                            type=str,
                            default="",
                            dest="graph",
                            help="Input graph in GRAPH format")
    input_args.add_argument("--test",
                            action="store_const",
                            const=os.path.join(home_directory, "test_dataset/dsf/test.graph"),
                            dest="graph",
                            help="Running test dataset")

    out_args = parser.add_argument_group("Output")
    out_args.add_argument("-o", "--output",
                            type=str,
                            default=os.path.join(home_directory, "dsf_test"),
                            help="Output directory")

    optional_args = parser.add_argument_group("Optional arguments")
    optional_args.add_argument("-t", "--threads",
                               type=int,
                               default=16,
                               dest="num_threads",
                               help="Threads number [default: %(default)d]")
    optional_args.add_argument("-f", '--min-fillin',
                               type=float,
                               default=0.6,
                               dest="min_fillin",
                               help='Minimum fill-in of dense subgraphs [default: %(default)f]')
    optional_args.add_argument("-n", "--min-snode-size",
                               type=int,
                               default=5,
                               dest="min_snode_size",
                               help="Minimum vertex weight that prevents its gluing with other heavy vertex "
                                    "[default: %(default)d]")
    optional_args.add_argument("-s", "--min-size",
                               type=int,
                               default=5,
                               dest="min_graph_size",
                               help="Minimum size of graph where dense subgraphs will be computed "
                                    "[default: %(default)d]")
    optional_args.add_argument("--create-triv-dec",
                               action="store_const",
                               const=True,
                               dest="create_trivial_decomposition",
                               help='Creating decomposition according to connected components [default: False]')
    optional_args.add_argument("--save-aux-files",
                               action="store_const",
                               const=True,
                               dest="save_aux_files",
                               help="Saving auxiliary files: subgraphs in GRAPH format and their decompositions "
                                    "[default: False]")
    optional_args.add_argument("--clean-output-dir",
                               default=True,
                               dest="clean_output_dir",
                               action="store_true",
                               help="Clean output directory on start [default]")
    optional_args.add_argument("--no-clean-output-dir",
                               default=True,
                               dest="clean_output_dir",
                               action="store_false",
                               help="Do not clean output directory on start")
    optional_args.add_argument("-h", "--help",
                               action="help",
                               help="Help message and exit")

    parser.set_defaults(config_dir="configs",
                        config_file="config.info")

    # prepare log
    log = logging.getLogger('dense_subgraph_finder')
    log.setLevel(logging.DEBUG)
    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)
    if external_logger != "":
        external_log_handler = logging.FileHandler(external_logger, mode = "a")
        log.addHandler(external_log_handler)

    args = [arg for arg in argv if ntpath.basename(arg) != 'dense_subgraph_finder.py']
    params = parser.parse_args(args)

    CheckParamsCorrectness(params, log, parser)
    SetOutputParams(params, params.output)

    PrepareOutputDir(params, log)

    # log file
    params.log_filename = os.path.join(params.output, "dense_subgraph_finder.log")
    if os.path.exists(params.log_filename):
        log.info("Removing %s" % params.log_filename)
        os.remove(params.log_filename)
    log_handler = logging.FileHandler(params.log_filename, mode='a')
    log.addHandler(log_handler)

    # print command line
    command_line = "Command_line: "
    if argv[0] != "dense_subgraph_finder.py":
        command_line += "dense_subgraph_finder.py "
    command_line += " ".join(argv)
    log.info(command_line + "\n")
    PrintParams(params, log)
    log.info("Log will be written to " + params.log_filename + "\n")

    PrepareConfigs(params, log)

    # run dense subgraph finder
    try:
        dsf_command_line = init.PathToBins.run_dense_sgraph_finder + " " + params.config_file
        support.sys_call(dsf_command_line, log)
        Cleanup(params, log)
        log.info("\nThank you for using Dense Subgraph Finder!\n")
    except (KeyboardInterrupt):
        log.info("\nDense subgraph finder was interrupted!")
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            supportInfo(log)
    except BaseException:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            supportInfo(log)

    log.info("Log was written to " + params.log_filename)
def main(argv):
    from argparse import ArgumentParser
    parser = ArgumentParser(description="== " + tool_name + ": a tool for diversity analysis of full-length immunosequencing reads ==",
                            epilog="In case you have troubles running " + tool_name + ", you can write to [email protected]."
                            "Please provide us with diversity_analyzer.log file from the output directory.",
                            add_help=False)
    req_args = parser.add_argument_group("Required params")
    input_args = req_args.add_mutually_exclusive_group(required=True)
    input_args.add_argument("-i", "--input",
                            type=str,
                            default="",
                            dest="input_reads",
                            help="Input reads in FASTQ/FATSA format")
    input_args.add_argument("--test",
                            action="store_const",
                            const=test_reads,
                            dest="input_reads",
                            help="Running test dataset")

    out_args = parser.add_argument_group("Output")
    out_args.add_argument("-o", "--output",
                          type=str,
                          dest="output_dir",
                          default="", #os.path.join(home_directory, "cdr_test"),
                          help="Output directory")

    optional_args = parser.add_argument_group("Optional arguments")
    optional_args.add_argument("-t", "--threads",
                               type=int,
                               default=16,
                               dest="num_threads",
                               help="Threads number [default: %(default)d]")
    optional_args.add_argument("-d", '--domain',
                               type=str,
                               default="imgt",
                               dest="domain_system",
                               help='Domain system for CDR search: imgt OR kabat [default: %(default)s]')

    vj_finder_args= parser.add_argument_group("VJ alignment params")
    optional_args.add_argument("-l", "--loci",
                               type=str,
                               default="all",
                               dest="loci",
                               help="Loci: IGH, IGK, IGL, IG (all BCRs), TRA, TRB, TRG, TRD, TR (all TCRs) or all. "
                                    "[default: %(default)s]")
    optional_args.add_argument("--org",
                               type=str,
                               default="human",
                               dest="organism",
                               help="Organism: human, mouse, rat, rabbit, rhesus-monkey [default: %(default)s]")

    optional_args.add_argument('--skip-plots',
                               action='store_const',
                               const=True,
                               dest = "skip_plots",
                               help = "Skip drawing plots")

    optional_args.add_argument("-h", "--help",
                               action="help",
                               help="Help message and exit")

    # prepare log
    log = logging.getLogger('diversity_analyzer')
    log.setLevel(logging.DEBUG)
    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    params = parser.parse_args()

    CheckBinariesExistance(params, log)
    CheckParamsCorrectness(params, log)
    SetOutputParams(params, log)

    PrepareOutputDir(params)

    # log file
    params.log_filename = os.path.join(params.output_dir, "diversity_analyzer.log")
    if os.path.exists(params.log_filename):
        os.remove(params.log_filename)
    log_handler = logging.FileHandler(params.log_filename, mode='a')
    log.addHandler(log_handler)

    # print command line
    command_line = "Command_line: "
    command_line += " ".join(argv)
    log.info(command_line + "\n")
    PrintParams(params, log)
    log.info("Log will be written to " + params.log_filename + "\n")

    PrepareConfigs(params, log)
    try:
        cdr_command_line = run_cdr_labeler + " " + params.cdr_labeler_config_file
        support.sys_call(cdr_command_line, log)
        if not params.skip_plots:
            log.info("\n==== Visualization of diversity statistics ====")
            visualize_vj_stats.main(["", os.path.join(params.output_dir, "cdr_details.txt"),
                                 os.path.join(params.output_dir, "shm_details.txt"),
                                 params.output_dir, log])
            log.info("\n==== Annotation report creation ====")
            html_report_writer.main(os.path.join(params.output_dir, "cdr_details.txt"),
                                os.path.join(params.output_dir, "shm_details.txt"),
                                os.path.join(params.output_dir, "plots"),
                                os.path.join(params.output_dir, "annotation_report.html"), log)
        Cleanup(params, log)
        log.info("\nThank you for using " + tool_name + "!\n")
    except (KeyboardInterrupt):
         log.info("\n" + tool_name + " was interrupted!")
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
             #supportInfo(log)
    except BaseException:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            log.info("\nERROR: Exception caught.")
            #supportInfo(log)

    log.info("Log was written to " + params.log_filename)
Esempio n. 46
0
def main(ds_args_list, general_args_list, spades_home, bin_home):
    log = logging.getLogger('dipspades')
    log.setLevel(logging.DEBUG)
    console = logging.StreamHandler(sys.stdout)
    console.setFormatter(logging.Formatter('%(message)s'))
    console.setLevel(logging.DEBUG)
    log.addHandler(console)

    support.check_binaries(bin_home, log)
    ds_args = parse_arguments(ds_args_list, log)

    if not os.path.exists(ds_args.output_dir):
        os.makedirs(ds_args.output_dir)
    log_filename = os.path.join(ds_args.output_dir, "dipspades.log")
    if os.path.exists(log_filename):
        os.remove(log_filename)
    log_handler = logging.FileHandler(log_filename, mode='a')
    log.addHandler(log_handler)

    params_filename = os.path.join(ds_args.output_dir, "params.txt")
    params_handler = logging.FileHandler(params_filename, mode='a')
    log.addHandler(params_handler)

    log.info("\n")
    log.info("General command line: " + " ".join(general_args_list) + "\n")
    log.info("dipSPAdes command line: " + " ".join(ds_args_list) + "\n")
    print_ds_args(ds_args, log)
    log.removeHandler(params_handler)

    log.info("\n======= dipSPAdes started. Log can be found here: " + log_filename + "\n")
    write_haplocontigs_in_file(ds_args.haplocontigs, ds_args.haplocontigs_fnames)

    config_fname = prepare_configs(os.path.join(spades_home, "configs", "dipspades"), ds_args, log)
    ds_args.tmp_dir = support.get_tmp_dir(prefix="dipspades_", base_dir=ds_args.tmp_dir)
    prepare_config(config_fname, ds_args, log)

    try:
        log.info("===== Assembling started.\n")
        binary_path = os.path.join(bin_home, "dipspades")
        command = [binary_path, config_fname]
        support.sys_call(command, log)
        log.info("\n===== Assembling finished.\n")
        print_ds_output(ds_args.output_dir, log)
        if os.path.isdir(ds_args.tmp_dir):
            shutil.rmtree(ds_args.tmp_dir)
        log.info("\n======= dipSPAdes finished.\n")
        log.info("dipSPAdes log can be found here: " + log_filename + "\n")
        log.info("Thank you for using dipSPAdes!")
        log.removeHandler(log_handler)
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            if exc_type == OSError and exc_value.errno == errno.ENOEXEC: # Exec format error
                support.error("It looks like you are using SPAdes binaries for another platform.\n" +
                              support.get_spades_binaries_info_message(), dipspades=True)
            else:
                log.exception(exc_value)
                support.error("exception caught: %s" % exc_type, log)
    except BaseException: # since python 2.5 system-exiting exceptions (e.g. KeyboardInterrupt) are derived from BaseException
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
            log.exception(exc_value)
            support.error("exception caught: %s" % exc_type, log, dipspades=True)