Python Pipeline.submit Exemples, CGAT.Pipeline.submit Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : Charlie-George/cgat

def buildExpectedGenomeCoverage(infiles, outfile):
    '''
    build the expected coverage over the genomes
    in the sample based on read depth and length
    '''
    P.submit("PipelineMetagenomeBenchmark",
             "buildExpectedCoverageOverGenomes", infiles=infiles, outfiles=outfile)

Exemple #2

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : Charlie-George/cgat

def buildCoverageOverGenomes(infiles, outfile):
    '''
    create file with the coverage over each of the 
    simulated genomes
    '''
    P.submit("PipelineMetagenomeBenchmark", "buildCoverageOverGenomes",
             infiles=infiles, outfiles=outfile)

Exemple #3

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : lesheng/cgat

def filterContigsByCoverage(infiles, outfile):
    '''
    filter contigs by their average base coverage
    '''
    P.submit("PipelineMetagenomeBenchmark",
             "filterByCoverage",
             infiles=infiles,
             outfiles=outfile)

Exemple #4

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : lesheng/cgat

def buildCoverageOverGenomes(infiles, outfile):
    '''
    create file with the coverage over each of the 
    simulated genomes
    '''
    P.submit("PipelineMetagenomeBenchmark",
             "buildCoverageOverGenomes",
             infiles=infiles,
             outfiles=outfile)

Exemple #5

0

Afficher le fichier

Fichier : pipeline_idr.py Projet : jmadzo/cgat

def findNPeaksForPooledPseudoreplicates(infiles, outfile):
    idr_thresh = PARAMS["idr_options_pooled_consistency_threshold"]
    module = P.snip(IDR.__file__, ".py")

    P.submit(module,
             "findNPeaks",
             params=[str(idr_thresh), ],
             infiles=infiles,
             outfiles=outfile)

Exemple #6

0

Afficher le fichier

Fichier : pipeline_idr.py Projet : jmadzo/cgat

def findNPeaksForIndividualReplicates(infiles, outfile):
    idr_thresh = PARAMS["idr_options_inter_replicate_threshold"]
    module = P.snip(IDR.__file__, ".py")

    P.submit(module,
             "findNPeaks",
             params=[str(idr_thresh), ],
             infiles=infiles,
             outfiles=outfile)

Exemple #7

0

Afficher le fichier

def findNPeaksForPooledPseudoreplicates(infiles, outfile):
    idr_thresh = PARAMS["idr_options_pooled_consistency_threshold"]
    module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "findNPeaks",
             params=[str(idr_thresh), ],
             infiles=infiles,
             outfiles=outfile)

Exemple #8

0

Afficher le fichier

def findNPeaksForIndividualReplicates(infiles, outfile):
    idr_thresh = PARAMS["idr_options_inter_replicate_threshold"]
    module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "findNPeaks",
             params=[str(idr_thresh), ],
             infiles=infiles,
             outfiles=outfile)

Exemple #9

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : lesheng/cgat

def buildExpectedGenomeCoverage(infiles, outfile):
    '''
    build the expected coverage over the genomes
    in the sample based on read depth and length
    '''
    P.submit("PipelineMetagenomeBenchmark",
             "buildExpectedCoverageOverGenomes",
             infiles=infiles,
             outfiles=outfile)

Exemple #10

0

Afficher le fichier

Fichier : pipeline_idr.py Projet : Charlie-George/cgat

def findNPeaksForPseudoreplicates(infiles, outfile):
    idr_thresh = PARAMS["idr_options_self_consistency_threshold"]
    try:
        module = P.snip(IDR.__file__, ".py")
    except ValueError:
        module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "findNPeaks",
             params=[str(idr_thresh), ],
             infiles=infiles,
             outfiles=outfile)

Exemple #11

0

Afficher le fichier

def splitPooledBamfiles(infile, sentinel):
    infile = P.snip(infile, ".sentinel") + ".bam"
    outfile = P.snip(sentinel, ".sentinel")
    params = '2'
    try:
        module = P.snip(IDR.__file__, ".py")
    except ValueError:
        module = P.snip(IDR.__file__, ".pyc")

    P.submit(module, "splitBam", params, infile, outfile)

    P.touch(sentinel)

Exemple #12

0

Afficher le fichier

Fichier : pipeline_idr.py Projet : jmadzo/cgat

def splitPooledBamfiles(infile, sentinal):
    infile = P.snip(infile, ".sentinal") + ".bam"
    outfile = P.snip(sentinal, ".sentinal")
    params = '2'
    module = P.snip(IDR.__file__, ".py")

    P.submit(module,
             "splitBam",
             params,
             infile,
             outfile)

    P.touch(sentinal)

Exemple #13

0

Afficher le fichier

def splitPooledBamfiles(infile, sentinal):
    infile = P.snip(infile, ".sentinal") + ".bam"
    outfile = P.snip(sentinal, ".sentinal")
    params = '2'
    module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "splitBam",
             params,
             infile,
             outfile)

    P.touch(sentinal)

Exemple #14

0

Afficher le fichier

def findNPeaksForPseudoreplicates(infiles, outfile):
    idr_thresh = PARAMS["idr_options_self_consistency_threshold"]
    try:
        module = P.snip(IDR.__file__, ".py")
    except ValueError:
        module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "findNPeaks",
             params=[
                 str(idr_thresh),
             ],
             infiles=infiles,
             outfiles=outfile)

Exemple #15

0

Afficher le fichier

def splitBamfiles(infile, sentinel):
    """
    For all tracks, split the filtered bamfile in two using pysam
    """
    infile = P.snip(infile, ".sentinel") + ".bam"
    outfile = P.snip(sentinel, ".sentinel")
    params = '2'
    try:
        module = P.snip(IDR.__file__, ".py")
    except ValueError:
        module = P.snip(IDR.__file__, ".pyc")

    P.submit(module, "splitBam", params, infile, outfile)

    P.touch(sentinel)

Exemple #16

0

Afficher le fichier

Fichier : pipeline_idr.py Projet : jmadzo/cgat

def splitBamfiles(infile, sentinal):
    """
    For all tracks, split the filtered bamfile in two using pysam
    """
    infile = P.snip(infile, ".sentinal") + ".bam"
    outfile = P.snip(sentinal, ".sentinal")
    params = '2'
    module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "splitBam",
             params,
             infile,
             outfile)

    P.touch(sentinal)

Exemple #17

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : yangjl/cgat

def buildChimerasBasedOnReads(infile, outfile):
    '''
    this function is an alternative to counting a contig as a chimera
    if it aligns to more than one genome. A contig is likely to align
    to multiple genomes with high idenitity if there contains very similar
    genomes in the sample. This is true of our simulation that contains
    subspecies of the same species e.g. B.fragilis subspecies

    A more appropriate method for assessing chimericity is to score
    each contig with a chimericity score. The chimericity score is the 
    ratio of "good" alignments / "bad" alignments. An alignment is considered
    "good" if it is from the species from which the majority of alignments
    from that contig are derived'''

    P.submit("CGATPipelines.PipelineMetagenomeBenchmark", "buildChimerasBasedOnReads"
             , infiles = infile, outfiles = outfile)

Exemple #18

0

Afficher le fichier

Fichier : pipeline_idr.py Projet : Charlie-George/cgat

def splitPooledBamfiles(infile, sentinel):
    infile = P.snip(infile, ".sentinel") + ".bam"
    outfile = P.snip(sentinel, ".sentinel")
    params = '2'
    try:
        module = P.snip(IDR.__file__, ".py")
    except ValueError:
        module = P.snip(IDR.__file__, ".pyc")

    P.submit(module,
             "splitBam",
             params,
             infile,
             outfile)

    P.touch(sentinel)

Exemple #19

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : lesheng/cgat

def buildChimerasBasedOnReads(infile, outfile):
    '''
    this function is an alternative to counting a contig as a chimera
    if it aligns to more than one genome. A contig is likely to align
    to multiple genomes with high idenitity if there contains very similar
    genomes in the sample. This is true of our simulation that contains
    subspecies of the same species e.g. B.fragilis subspecies

    A more appropriate method for assessing chimericity is to score
    each contig with a chimericity score. The chimericity score is the 
    ratio of "good" alignments / "bad" alignments. An alignment is considered
    "good" if it is from the species from which the majority of alignments
    from that contig are derived'''

    P.submit("CGATPipelines.PipelineMetagenomeBenchmark",
             "buildChimerasBasedOnReads",
             infiles=infile,
             outfiles=outfile)

Exemple #20

0

Afficher le fichier

def buildCoverageOverContigs(infiles, outfile):
    '''
    build histograms of the coverage over each of the contigs
    '''
    bam = infiles[0]
    # genomecoveragebed does not like some of the 
    # output from bwa. bwa outputs some reads
    # that map off the end of contigs
    # as having a leftmost position of 0. This is
    # not ideal. Need to use temporary bam
    # files with only mapped reads - this is 
    # nasty and needs changing
    tempdir = P.getTempDir(".")
    tempname = P.getTempFilename(tempdir) + ".bam"
    P.submit("CGATPipelines.PipelineMetagenomeAssembly", 
             "filterBamOnPos", 
             infiles = bam, 
             outfiles = tempname)

    # tablename where alignment stats live
    tablename = os.path.dirname(
        bam)[:-len(".dir")] + "_" + P.snip(os.path.basename(bam), ".bam") + "_alignment_stats"

    # hack to convert to table - add .load
    tablename = P.toTable(tablename + ".load")
    
    # connect to database
    dbh = connect()
    cc = dbh.cursor()

    # get number of reads aligned from bam2stats
    if PARAMS.get("coverage_scale"):
        scale_factor = cc.execute("""SELECT counts FROM %s
                                     WHERE category == 'reads_mapped'""" % tablename).fetchone()[0]
        scale_factor = 1 / (float(scale_factor) / 1000000)
        scale_options = "-scale %(scale_factor)f"
    else:
        scale_options = ""

    statement = '''genomeCoverageBed -ibam %(tempname)s %(scale_options)s -d | gzip > %(outfile)s;
                   rm -rf %(tempdir)s'''
    P.run()

Exemple #21

0

Afficher le fichier

Fichier : pipeline_metagenomebenchmark.py Projet : yangjl/cgat

def filterContigsByCoverage(infiles, outfile):
    '''
    filter contigs by their average base coverage
    '''
    P.submit("PipelineMetagenomeBenchmark", "filterByCoverage", infiles = infiles, outfiles = outfile)