Ejemplo n.º 1
0
def run_presto(input_file, output_dir, log=None, remove_tmp=True):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    # gunzip
    input_file_new = "%s/input_reads.fasta" % output_dir
    fastx2fastx(input_file, input_file_new)

    args = {"input_file": input_file_new, "output_dir": output_dir}

    timer = Timer()
    support.sys_call(
        "CollapseSeq.py -s %(input_file)s --outdir %(output_dir)s --outname presto"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")

    presto_output = output_dir + "/presto_collapse-unique.fasta"
    repertoire_fa = output_dir + "/final_repertoire.fa"
    with smart_open(presto_output) as fin, smart_open(repertoire_fa,
                                                      "w") as fout:
        for i, record in enumerate(
                SeqIO.parse(fin, idFormatByFileName(presto_output))):
            id = record.description
            size = parse_presto_id(id)
            record.id = record.description = "cluster___%d___size___%d" % (
                i, size)
            SeqIO.write(record, fout, "fasta")

    if remove_tmp:
        os.remove(input_file_new)
        os.remove(presto_output)
Ejemplo n.º 2
0
def simulate_data(input_file, output_dir, log=None, **kwargs):
    import tempfile
    import shutil

    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    temp_dir = tempfile.mkdtemp()
    run_igrec(input_file, temp_dir, remove_tmp=False,
              tau=1)  # Run IgReC for VJF output

    input_file = temp_dir + "/vj_finder/cleaned_reads.fa"

    simulated_repertoire_to_rcm(input_file,
                                "%s/final_repertoire.rcm" % output_dir)

    simulated_repertoire_to_final_repertoire(
        input_file, "%s/final_repertoire.fa.gz" % output_dir)

    args = {
        "path": igrec_dir,
        "repertoire": output_dir + "/final_repertoire.fa.gz",
        "rcm": output_dir + "/final_repertoire.rcm"
    }
    support.sys_call(
        "%(path)s/py/ig_compress_equal_clusters.py %(repertoire)s %(repertoire)s -r %(rcm)s"
        % args,
        log=log)

    # TODO factor this stage
    jit_fx_file(input_file, "%s/input_reads.fa.gz" % output_dir, **kwargs)

    shutil.rmtree(temp_dir)
Ejemplo n.º 3
0
def simulate_data_from_dir(input_dir, output_dir, log=None, **kwargs):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    jit_fx_file(input_dir + "/error_free_reads.fa.gz",
                "%s/input_reads.fa.gz" % output_dir, **kwargs)
Ejemplo n.º 4
0
def run_mixcr2_alignment_only(input_file,
                              output_dir,
                              log=None,
                              loci="all",
                              enforce_fastq=False,
                              threads=16,
                              remove_tmp=True,
                              species="hsa"):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    if enforce_fastq and idFormatByFileName(input_file) == "fasta":
        input_file_fq = "%s/input_reads.fq" % output_dir
        fastx2fastx(input_file, input_file_fq)
        input_file = input_file_tmp = input_file_fq
    elif idFormatByFileName(input_file) == "fasta":
        input_file_fasta = "%s/input_reads.fasta" % output_dir
        fastx2fastx(input_file, input_file_fasta)
        input_file = input_file_tmp = input_file_fasta
    else:
        input_file_tmp = None

    path = path_to_mixcr2
    args = {
        "path": path,
        "compress_eq_clusters_cmd":
        path_to_igrec + "/py/ig_compress_equal_clusters.py",
        "mixcr_cmd": "java -jar %s/mixcr.jar" % path,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "species": species,
        "loci": loci,
        "loci_arg": "chains"
    }

    # support.sys_call("%(mixcr_cmd)s align -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --%(loci_arg)s %(loci)s --noMerge --species %(species)s %(input_file)s %(output_dir)s/mixcr.vdjca" % args,
    #                  log=log)
    timer = Timer()
    support.sys_call(
        "%(mixcr_cmd)s align -p kaligner2 --species %(species)s -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --noMerge --%(loci_arg)s %(loci)s -OreadsLayout=Collinear -OvParameters.geneFeatureToAlign=VTranscript -OallowPartialAlignments=true %(input_file)s %(output_dir)s/mixcr.vdjca"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")

    if remove_tmp:
        if input_file_tmp is not None:
            os.remove(input_file_tmp)

        os.remove(output_dir + "/align_report.txt")
        os.remove(output_dir + "/mixcr.vdjca")
Ejemplo n.º 5
0
    def ref_free_plots(rep, name, dir):
        if args.figure_format:
            mkdir_p(dir)

            rep.plot_distribution_of_errors_in_reads(
                out=dir + "/%s_distribution_of_errors_in_reads" % name,
                format=args.figure_format)
            rep.plot_estimation_of_max_error_distribution(
                out=dir + "/%s_max_error_scatter" % name,
                format=args.figure_format)
            if args.page_mode:
                ymax = 0.041
            else:
                ymax = 0

            for i in range(5):
                cluster = rep.largest(i)
                cluster.plot_profile(
                    out=dir + "/%s_cluster_discordance_profile_largest_%d" %
                    (name, i + 1),
                    format=args.figure_format,
                    ymax=ymax)
                cluster.plot_profile(out=dir +
                                     "/%s_cluster_error_profile_largest_%d" %
                                     (name, i + 1),
                                     discordance=False,
                                     format=args.figure_format,
                                     ymax=ymax)

            rep.plot_profile(out=dir +
                             "/%s_cluster_discordance_profile" % name,
                             format=args.figure_format,
                             ymax=ymax)
            rep.plot_profile(out=dir + "/%s_cluster_error_profile" % name,
                             discordance=False,
                             format=args.figure_format,
                             ymax=ymax)

        if args.export_bad_clusters:
            mkdir_p(dir)
            rep.export_bad_clusters(out=dir + "/bad_%s_clusters/" % name)
        rep.report(report, "%s_stats" % name)
Ejemplo n.º 6
0
def run_mixcr2(input_file,
               output_dir,
               log=None,
               loci="all",
               enforce_fastq=False,
               threads=16,
               remove_tmp=True,
               species="hsa",
               region_from="FR1Begin",
               region_to="FR4Begin"):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    if enforce_fastq and idFormatByFileName(input_file) == "fasta":
        input_file_fq = "%s/input_reads.fq" % output_dir
        fastx2fastx(input_file, input_file_fq)
        input_file = input_file_tmp = input_file_fq
    elif idFormatByFileName(input_file) == "fasta":
        input_file_fasta = "%s/input_reads.fasta" % output_dir
        fastx2fastx(input_file, input_file_fasta)
        input_file = input_file_tmp = input_file_fasta
    else:
        input_file_tmp = None

    path = path_to_mixcr2
    args = {
        "path": path,
        "compress_eq_clusters_cmd":
        path_to_igrec + "/py/ig_compress_equal_clusters.py",
        "mixcr_cmd": "java -jar %s/mixcr.jar" % path,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "species": species,
        "loci": loci,
        "from": region_from,
        "to": region_to,
        "loci_arg": "chains"
    }

    # support.sys_call("%(mixcr_cmd)s align -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --%(loci_arg)s %(loci)s --noMerge --species %(species)s %(input_file)s %(output_dir)s/mixcr.vdjca" % args,
    timer = Timer()
    #                  log=log)
    support.sys_call(
        "%(mixcr_cmd)s align -p kaligner2 --species %(species)s -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --noMerge --%(loci_arg)s %(loci)s -OreadsLayout=Collinear -OvParameters.geneFeatureToAlign=VTranscript -OallowPartialAlignments=true %(input_file)s %(output_dir)s/mixcr.vdjca"
        % args,
        log=log)
    # support.sys_call("%(mixcr_cmd)s assemble -p default_affine -OassemblingFeatures=VDJRegion -OseparateByC=true -OqualityAggregationType=Average -OclusteringFilter.specificMutationProbability=1E-5 -OmaxBadPointsPercent=0 -t %(threads)d -r %(output_dir)s/assemble_report.txt --index %(output_dir)s/index_file %(output_dir)s/mixcr.vdjca %(output_dir)s/mixcr.clns" % args,
    # support.sys_call("%(mixcr_cmd)s assemble -f -p default_affine -OassemblingFeatures=VDJRegion -OseparateByC=true -OqualityAggregationType=Average -OclusteringFilter.specificMutationProbability=1E-5 -OmaxBadPointsPercent=0 -r %(output_dir)s/assemble_report.txt --index %(output_dir)s/index_file %(output_dir)s/mixcr.vdjca %(output_dir)s/mixcr.clns" % args,
    #                  log=log)
    # support.sys_call("%(mixcr_cmd)s assemble -t %(threads)d -f -r %(output_dir)s/assemble_report.txt --index %(output_dir)s/index_file %(output_dir)s/mixcr.vdjca %(output_dir)s/mixcr.clns" % args,
    #                  log=log)
    support.sys_call(
        "%(mixcr_cmd)s assemble -t %(threads)d -f -r %(output_dir)s/assemble_report.txt --index %(output_dir)s/index_file -OassemblingFeatures=\"{%(from)s:%(to)s}\" %(output_dir)s/mixcr.vdjca %(output_dir)s/mixcr.clns"
        % args,
        log=log)
    args[
        "small_features"] = "-sequence -count -readIds %(output_dir)s/index_file" % args
    support.sys_call(
        "%(mixcr_cmd)s exportClones %(small_features)s -f --no-spaces %(output_dir)s/mixcr.clns %(output_dir)s/mixcr.txt"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")

    args[
        "features"] = "-count -sequence -nFeature CDR3 -vHit -jHit -vAlignment -jAlignment -aaFeature CDR3 -readIds %(output_dir)s/index_file" % args
    support.sys_call(
        "%(mixcr_cmd)s exportClones %(features)s -f --no-spaces %(output_dir)s/mixcr.clns %(output_dir)s/features.txt"
        % args,
        log=log)
    # convert_mixcr_output_to_igrec("%(output_dir)s/mixcr.txt" % args, "%(output_dir)s/mixcr_uncompressed.fa" % args)

    convert_mixcr2_output_to_igrec(
        "%(output_dir)s/mixcr.txt" % args,
        "%(output_dir)s/mixcr_uncompressed.fa" % args, input_file,
        "%(output_dir)s/mixcr_uncompressed.rcm" % args)
    support.sys_call(
        "%(compress_eq_clusters_cmd)s %(output_dir)s/mixcr_uncompressed.fa %(output_dir)s/final_repertoire.fa -r %(output_dir)s/mixcr_uncompressed.rcm -R %(output_dir)s/final_repertoire.rcm"
        % args)

    if remove_tmp:
        if input_file_tmp is not None:
            os.remove(input_file_tmp)

        os.remove(output_dir + "/align_report.txt")
        os.remove(output_dir + "/assemble_report.txt")
        os.remove(output_dir + "/mixcr.clns")
        os.remove(output_dir + "/mixcr.txt")
        os.remove(output_dir + "/features.txt")
        os.remove(output_dir + "/mixcr.vdjca")
        os.remove(output_dir + "/mixcr_uncompressed.fa")
        os.remove(output_dir + "/mixcr_uncompressed.rcm")
        os.remove(output_dir + "/index_file")
Ejemplo n.º 7
0
def run_mixcr(input_file,
              output_dir,
              log=None,
              loci="all",
              enforce_fastq=False,
              threads=16,
              remove_tmp=True,
              species="hsa",
              version=1):
    if log is None:
        log = FakeLog()

    mkdir_p(output_dir)

    if enforce_fastq and idFormatByFileName(input_file) == "fasta":
        input_file_fq = "%s/input_reads.fq" % output_dir
        fastx2fastx(input_file, input_file_fq)
        input_file = input_file_tmp = input_file_fq
    elif idFormatByFileName(input_file) == "fasta":
        input_file_fasta = "%s/input_reads.fasta" % output_dir
        fastx2fastx(input_file, input_file_fasta)
        input_file = input_file_tmp = input_file_fasta
    else:
        input_file_tmp = None

    path = path_to_mixcr if version == 1 else path_to_mixcr2
    args = {
        "path": path,
        "compress_eq_clusters_cmd":
        path_to_igrec + "/py/ig_compress_equal_clusters.py",
        "mixcr_cmd": "java -jar %s/mixcr.jar" % path,
        "threads": threads,
        "input_file": input_file,
        "output_dir": output_dir,
        "species": species,
        "loci": loci,
        "loci_arg": "loci" if version == 1 else "chains"
    }

    timer = Timer()
    support.sys_call(
        "%(mixcr_cmd)s align -t %(threads)d -f -g -r %(output_dir)s/align_report.txt --%(loci_arg)s %(loci)s --noMerge -OvParameters.geneFeatureToAlign=VTranscript --species %(species)s %(input_file)s %(output_dir)s/mixcr.vdjca"
        % args,
        log=log)
    support.sys_call(
        "%(mixcr_cmd)s assemble -t %(threads)d -f -r %(output_dir)s/assemble_report.txt -OassemblingFeatures=\"{FR1Begin:FR4Begin}\" %(output_dir)s/mixcr.vdjca %(output_dir)s/mixcr.clns"
        % args,
        log=log)
    support.sys_call(
        "%(mixcr_cmd)s exportClones -sequence -count -f --no-spaces %(output_dir)s/mixcr.clns %(output_dir)s/mixcr.txt"
        % args,
        log=log)
    timer.stamp(output_dir + "/time.txt")

    args[
        "features"] = "-count -sequence -nFeature CDR3 -vHit -jHit -vAlignment -jAlignment -aaFeature CDR3"
    support.sys_call(
        "%(mixcr_cmd)s exportClones %(features)s -f --no-spaces %(output_dir)s/mixcr.clns %(output_dir)s/features.txt"
        % args,
        log=log)
    convert_mixcr_output_to_igrec(
        "%(output_dir)s/mixcr.txt" % args,
        "%(output_dir)s/mixcr_uncompressed.fa" % args)
    support.sys_call(
        "%(compress_eq_clusters_cmd)s %(output_dir)s/mixcr_uncompressed.fa %(output_dir)s/final_repertoire.fa"
        % args)

    if remove_tmp:
        if input_file_tmp is not None:
            os.remove(input_file_tmp)

        os.remove(output_dir + "/mixcr.clns")
        os.remove(output_dir + "/mixcr.txt")
        os.remove(output_dir + "/mixcr.vdjca")
        os.remove(output_dir + "/mixcr_uncompressed.fa")
Ejemplo n.º 8
0
def main(args):
    if args.reconstruct:
        if args.initial_reads and args.constructed_repertoire and not args.constructed_rcm:
            log.info("Try to reconstruct repertoire RCM file...")
            rcm = reconstruct_rcm(args.initial_reads,
                                  args.constructed_repertoire,
                                  threads=args.threads)
            args.constructed_rcm = args.output_dir + "/constructed.rcm"
            write_rcm(rcm, args.constructed_rcm)

        if args.initial_reads and not args.constructed_repertoire and args.constructed_rcm:
            log.info("Try to reconstruct repertoire sequence file...")
            args.constructed_repertoire = args.output_dir + "/constructed.fa.gz"
            run_consensus_constructor(rcm_file=args.constructed_rcm,
                                      initial_reads=args.initial_reads,
                                      output_file=args.constructed_repertoire,
                                      threads=args.threads)

        if args.initial_reads and args.reference_repertoire and not args.reference_rcm and args.rcm_based:
            log.info("Try to reconstruct reference RCM file...")
            rcm = reconstruct_rcm(args.initial_reads,
                                  args.reference_repertoire,
                                  threads=args.threads)
            args.reference_rcm = args.output_dir + "/reference.rcm"
            write_rcm(rcm, args.reference_rcm)

        if args.initial_reads and not args.reference_repertoire and args.reference_rcm and args.rcm_based:
            log.info(
                "Try to reconstruct reference repertoire sequence file...")
            args.reference_repertoire = args.output_dir + "/reference.fa.gz"
            run_consensus_constructor(rcm_file=args.reference_rcm,
                                      initial_reads=args.initial_reads,
                                      output_file=args.reference_repertoire,
                                      threads=args.threads)

    if args.initial_reads and args.constructed_repertoire and args.constructed_rcm and args.rcm_based:
        rep = Repertoire(args.constructed_rcm, args.initial_reads,
                         args.constructed_repertoire)

    report = Report()
    report.min_size = args.reference_size_cutoff

    def ref_free_plots(rep, name, dir):
        if args.figure_format:
            mkdir_p(dir)

            rep.plot_distribution_of_errors_in_reads(
                out=dir + "/%s_distribution_of_errors_in_reads" % name,
                format=args.figure_format)
            rep.plot_estimation_of_max_error_distribution(
                out=dir + "/%s_max_error_scatter" % name,
                format=args.figure_format)
            if args.page_mode:
                ymax = 0.041
            else:
                ymax = 0

            for i in range(5):
                cluster = rep.largest(i)
                cluster.plot_profile(
                    out=dir + "/%s_cluster_discordance_profile_largest_%d" %
                    (name, i + 1),
                    format=args.figure_format,
                    ymax=ymax)
                cluster.plot_profile(out=dir +
                                     "/%s_cluster_error_profile_largest_%d" %
                                     (name, i + 1),
                                     discordance=False,
                                     format=args.figure_format,
                                     ymax=ymax)

            rep.plot_profile(out=dir +
                             "/%s_cluster_discordance_profile" % name,
                             format=args.figure_format,
                             ymax=ymax)
            rep.plot_profile(out=dir + "/%s_cluster_error_profile" % name,
                             discordance=False,
                             format=args.figure_format,
                             ymax=ymax)

        if args.export_bad_clusters:
            mkdir_p(dir)
            rep.export_bad_clusters(out=dir + "/bad_%s_clusters/" % name)
        rep.report(report, "%s_stats" % name)

    if args.initial_reads and args.constructed_repertoire and args.constructed_rcm and args.reference_free:
        ref_free_plots(rep, "constructed", args.reference_free_dir)

    if args.initial_reads and args.reference_repertoire and args.reference_rcm and args.reference_free:
        rep_ideal = Repertoire(args.reference_rcm, args.initial_reads,
                               args.reference_repertoire)
        ref_free_plots(rep_ideal, "reference", args.reference_free_dir)

    if args.constructed_repertoire and args.reference_repertoire:
        res = RepertoireMatch(
            args.constructed_repertoire,
            args.reference_repertoire,
            tmp_file=None,
            max_tau=args.tau,
            reference_trash_cutoff=args.reference_trash_cutoff,
            reference_trust_cutoff=args.reference_trust_cutoff,
            log=log,
            threads=args.threads)

        res.report(report)

        if args.figure_format:
            mkdir_p(args.reference_based_dir)

            for size in [1, 3, 5, 10]:
                res.plot_sensitivity_precision(
                    what="ref2cons",
                    out=args.reference_based_dir +
                    "/reference_to_constructed_distance_distribution_size_%d" %
                    size,
                    size=size,
                    differential=True,
                    format=args.figure_format)

                res.plot_sensitivity_precision(
                    what="cons2ref",
                    out=args.reference_based_dir +
                    "/constructed_to_reference_distance_distribution_size_%d" %
                    size,
                    size=size,
                    differential=True,
                    format=args.figure_format)

                res.plot_octoplot(out=args.reference_based_dir +
                                  "/distance_distribution",
                                  format=args.figure_format)

            res.plot_min_cluster_size_choose(out=args.reference_based_dir +
                                             "/sensitivity_precision",
                                             format=args.figure_format)

            res.plot_error_pos_dist(out=args.reference_based_dir +
                                    "/error_position_distribution",
                                    format=args.figure_format)

            res.plot_reference_vs_constructed_size(
                out=args.reference_based_dir +
                "/cluster_abundances_scatterplot",
                format=args.figure_format,
                marginals=False)

            if args.experimental:
                res.plot_reference_vs_constructed_size(
                    out=args.reference_based_dir +
                    "/cluster_abundances_scatterplot_hexes",
                    points=False,
                    format=args.figure_format,
                    marginals=False)

            res.plot_multiplicity_distributions(out=args.reference_based_dir +
                                                "/abundance_distributions_log",
                                                ylog=True,
                                                format=args.figure_format)

            res.plot_multiplicity_distributions(out=args.reference_based_dir +
                                                "/abundance_distributions",
                                                ylog=False,
                                                format=args.figure_format)

    if args.constructed_rcm and args.reference_rcm and args.partition_based:
        rcm2rcm = RcmVsRcm(args.constructed_rcm, args.reference_rcm)

        rcm2rcm.report(report, "rcm_stats_all_clusters")

        size = args.reference_size_cutoff

        rcm2rcm_large = rcm2rcm.prune_copy(size, 1)

        rcm2rcm_large.report(report)

        if args.figure_format:
            mkdir_p(args.reference_based_dir)
            if args.page_mode:
                ymax = 17000
            else:
                ymax = 0

            rcm2rcm.plot_purity_distribution(
                out=args.reference_based_dir +
                "/constructed_purity_distribution",
                format=args.figure_format,
                ymax=ymax)
            rcm2rcm.plot_discordance_distribution(
                out=args.reference_based_dir +
                "/constructed_discordance_distribution",
                format=args.figure_format,
                ymax=ymax)
            if args.experimental:
                rcm2rcm.plot_purity_distribution(
                    out=args.reference_based_dir +
                    "/constructed_purity_distribution_ylog",
                    format=args.figure_format,
                    ylog=True,
                    ymax=ymax)
                rcm2rcm.plot_discordance_distribution(
                    out=args.reference_based_dir +
                    "/constructed_discordance_distribution_ylog",
                    format=args.figure_format,
                    ylog=True,
                    ymax=ymax)

            rcm2rcm.plot_purity_distribution(out=args.reference_based_dir +
                                             "/reference_purity_distribution",
                                             format=args.figure_format,
                                             constructed=False,
                                             ymax=ymax)
            rcm2rcm.plot_discordance_distribution(
                out=args.reference_based_dir +
                "/reference_discordance_distribution",
                format=args.figure_format,
                constructed=False,
                ymax=ymax)
            if args.experimental:
                rcm2rcm.plot_purity_distribution(
                    out=args.reference_based_dir +
                    "/reference_purity_distribution_ylog",
                    format=args.figure_format,
                    constructed=False,
                    ylog=True,
                    ymax=ymax)
                rcm2rcm.plot_discordance_distribution(
                    out=args.reference_based_dir +
                    "/reference_discordance_distribution_ylog",
                    format=args.figure_format,
                    constructed=False,
                    ylog=True,
                    ymax=ymax)

            rcm2rcm_large.plot_purity_distribution(
                out=args.reference_based_dir +
                "/constructed_purity_distribution_large",
                format=args.figure_format,
                ymax=ymax)
            rcm2rcm_large.plot_discordance_distribution(
                out=args.reference_based_dir +
                "/constructed_discordance_distribution_large",
                format=args.figure_format,
                ymax=ymax)
            if args.experimental:
                rcm2rcm_large.plot_purity_distribution(
                    out=args.reference_based_dir +
                    "/constructed_purity_distribution_large_ylog",
                    format=args.figure_format,
                    ylog=True,
                    ymax=ymax)
                rcm2rcm_large.plot_discordance_distribution(
                    out=args.reference_based_dir +
                    "/constructed_discordance_distribution_large_ylog",
                    format=args.figure_format,
                    ylog=True,
                    ymax=ymax)

            rcm2rcm_large.plot_purity_distribution(
                out=args.reference_based_dir +
                "/reference_purity_distribution_large",
                format=args.figure_format,
                constructed=False,
                ymax=ymax)
            rcm2rcm_large.plot_discordance_distribution(
                out=args.reference_based_dir +
                "/reference_discordance_distribution_large",
                format=args.figure_format,
                constructed=False,
                ymax=ymax)
            if args.experimental:
                rcm2rcm_large.plot_discordance_distribution(
                    out=args.reference_based_dir +
                    "/reference_discordance_distribution_large_ylog",
                    format=args.figure_format,
                    constructed=False,
                    ylog=True,
                    ymax=ymax)
                rcm2rcm_large.plot_purity_distribution(
                    out=args.reference_based_dir +
                    "/reference_purity_distribution_large_ylog",
                    format=args.figure_format,
                    constructed=False,
                    ylog=True,
                    ymax=ymax)

            if args.experimental:
                rcm2rcm.plot_majority_secondary(
                    out=args.reference_based_dir +
                    "/constructed_majority_secondary",
                    format=args.figure_format)
                rcm2rcm.plot_size_nomajority(out=args.reference_based_dir +
                                             "/constructed_size_nomajority",
                                             format=args.figure_format)
                rcm2rcm.plot_majority_secondary(
                    out=args.reference_based_dir +
                    "/reference_majority_secondary",
                    format=args.figure_format,
                    constructed=False)
                rcm2rcm.plot_size_nomajority(out=args.reference_based_dir +
                                             "/reference_size_nomajority",
                                             format=args.figure_format,
                                             constructed=False)
                rcm2rcm_large.plot_majority_secondary(
                    out=args.reference_based_dir +
                    "/constructed_majority_secondary_large",
                    format=args.figure_format)
                rcm2rcm_large.plot_size_nomajority(
                    out=args.reference_based_dir +
                    "/constructed_size_nomajority_large",
                    format=args.figure_format)
                rcm2rcm_large.plot_majority_secondary(
                    out=args.reference_based_dir +
                    "/reference_majority_secondary_large",
                    format=args.figure_format,
                    constructed=False)
                rcm2rcm_large.plot_size_nomajority(
                    out=args.reference_based_dir +
                    "/reference_size_nomajority_large",
                    format=args.figure_format,
                    constructed=False)

    if args.constructed_rcm and args.reference_rcm and args.constructed_repertoire and args.reference_repertoire and args.experimental:
        splittering(rcm2rcm, rep, args, report)

    log.info(report)

    if args.text:
        report.toText(args.text)

    if args.json:
        report.toJson(args.json)
Ejemplo n.º 9
0
    if args.text:
        report.toText(args.text)

    if args.json:
        report.toJson(args.json)


def SupportInfo(log):
    log.info("\nIn case you have troubles running IgQUAST, "
             "you can write to [email protected].")
    log.info("Please provide us igquast.log file from the output directory.")


if __name__ == "__main__":
    args = parse_command_line()
    mkdir_p(args.output_dir)
    log = CreateLogger("IgQUAST")
    if args.log:
        AttachFileLogger(log, args.log)

    try:
        log.info("Command line: %s" % " ".join(sys.argv))
        main(args)
        log.info("\nThank you for using IgQUAST!")
    except (KeyboardInterrupt):
        log.info("\nIgQUAST has been interrupted!")
    except Exception:
        exc_type, exc_value, _ = sys.exc_info()
        if exc_type == SystemExit:
            sys.exit(exc_value)
        else:
Ejemplo n.º 10
0
            param['organism'] = item['Content']
        elif name == 'Input.app-session-name':
            param['AppSessionName'] = item['Content']
        elif name == 'Input.min-cluster-size':
            param["min-cluster-size"] = safe_cast(item['Content'], int, 5)
        elif name == 'Input.min-fillin':
            param["min-fillin"] = safe_cast(item['Content'], float, 0.6)
        elif name == 'Input.additionalfiles':
            param["additionalfiles"] = "save" in item['Items']
        elif name == 'Input.pseudogenes':
            param["pseudogenes"] = "pseudogenes" in item['Items']
        elif name == 'Input.Projects':
            projectID = item['Items'][0]['Id']

    param['outdir'] = '/data/output/appresults/%s/%s' % (projectID, "IgReC")
    mkdir_p(param['outdir'])

    igrec_path = os.path.join(igrec_dir, "igrec.py")

    if param['readtype'] == "merged":
        command = '%s -s "%s" ' % (igrec_path, param["reads"])
    elif param['readtype'] == "paired":
        command = '%s -1 "%s" -2 "%s" ' % (igrec_path, param["reads"][0], param["reads"][1])
    else:
        sys.exit(1)

    command += ' --tau=%(tau)d --loci=%(loci)s -o "%(outdir)s" --threads=%(threads)d --min-cluster-size=%(min-cluster-size)d --min-fillin=%(min-fillin)f' % param
    if not param["pseudogenes"]:
        command += " --no-pseudogenes"
    if param["additionalfiles"]:
        command += " --debug"