Exemple #1
0
def buildIdbaStats(infile, outfile):
    '''
    build idba stats:
    N50
    Number of scaffolds
    Total scaffold length
    '''
    PipelineMetagenomeAssembly.contig_to_stats(infile, outfile, PARAMS)
Exemple #2
0
def filterContigs(infile, outfile):
    '''
    filter contigs if specified in .ini file. If not specified
    then the pipeline will not remove any but will produce a new
    outfile - this is not space efficient and SHOULD BE CHANGED
    '''
    if not PARAMS["filter"]:
        length = 0
    else:
        length = PARAMS["filter"]

    PipelineMetagenomeAssembly.filterContigs(infile, outfile, length)
Exemple #3
0
def runSpades(infile, outfile):
    '''
    run spades on each track
    '''
    job_options = " -l mem_free=30G"
    statement = PipelineMetagenomeAssembly.Spades().build(infile)
    P.run()
Exemple #4
0
def runSoapdenovo(infile, outfile):
    '''
    run soapdenovo
    '''
    job_options = "-l mem_free=30G"
    statement = PipelineMetagenomeAssembly.SoapDenovo2().build(infile)
    P.run()
Exemple #5
0
def runIdba(infile, outfile):
    '''
    run idba on each track
    '''
    job_options = " -l mem_free=30G"
    statement = PipelineMetagenomeAssembly.Idba().build(infile)
    P.run()
Exemple #6
0
def runMetavelvet(infile, outfile):
    '''
    run meta-velvet on each track
    '''
    job_options = " -l mem_free=30G"
    statement = PipelineMetagenomeAssembly.Metavelvet().build(infile, PARAMS)
    P.run()
Exemple #7
0
def poolReadsAcrossConditions(infiles, outfile):
    '''
    pool reads across conditions
    '''
    statement = PipelineMetagenomeAssembly.pool_reads(infiles,
                                                      outfile)
    P.run()
def buildMetaphlanRelativeAbundance(infile, outfile):
    '''
    metaphlan is a program used in metagenomics. It assigns
    reads to clades based on specific genetic markers via 
    blastn searching
    '''
    to_cluster = True
    # at present the pipeline will take a set of files
    # and compute the abundances of different taxonomic groups
    # based on ALL reads i.e. paired data are combined into
    # a single file for analysis
    if PARAMS["metaphlan_executable"] == "bowtie2":
        assert os.path.exists(
            PARAMS["metaphlan_db"] + ".1.bt2"
        ), "missing file %s: Are you sure you have the correct database for bowtie2?" % PARAMS[
            "metaphlan_db"] + ".1.bt2"
        method = "--bowtie2db"
    elif PARAMS["metaphlan_executable"] == "blast":
        assert os.path.exists(
            PARAMS["metaphlan_db"] + "nin"
        ), "missing file %s: Are you sure you have the correct database for bowtie2?" % PARAMS[
            "metaphlan_db"] + "nin"
        method = "--blastdb"

    statement = PipelineMetagenomeAssembly.Metaphlan().build(infile,
                                                             method="rel_ab")
    P.run()
Exemple #9
0
def runRay(infile, outfile):
    '''
    run Ray on each track
    '''
    job_options = " -l h=!andromeda,h=!cgatgpu1,h=!cgatsmp1,h=!gandalf,h=!saruman \
                    -pe mpi 10 \
                    -q all.q "
    statement = PipelineMetagenomeAssembly.Ray().build(infile)
    P.run()
Exemple #10
0
def pool_out(infiles):
    '''
    return outfile name dependent on
    input pairedness
    '''
    # AH: patch required when importing pipeline
    if len(infiles) == 0:
        return ""

    out = {"separate": "1",
           False: ""}
    inf = infiles[0]
    paired = PipelineMetagenomeAssembly.PairedData().checkPairs(inf)
    if paired:
        paired = paired[0]
    format = PipelineMetagenomeAssembly.PairedData().getFormat(inf)
    outname = "pooled_reads.dir/agg-agg-agg.%s" % format
    return outname
Exemple #11
0
def buildContigLengths(infile, outfile):
    '''
    output lengths for each contig in each of the assemblies
    '''
    PipelineMetagenomeAssembly.build_scaffold_lengths(infile, outfile, PARAMS)
Exemple #12
0
def buildSoapdenovoConfig(infile, outfile):
    '''
    run SGA on each track
    '''
    PipelineMetagenomeAssembly.SoapDenovo2().config(infile, outfile, PARAMS)