###################################################
###################################################
###################################################
# Pipeline configuration
###################################################

# load options from the config file
import CGAT.Pipeline as P
P.getParameters(
    ["%s/pipeline.ini" % os.path.splitext(__file__)[0],
     "../pipeline.ini",
     "pipeline.ini"])

PARAMS = P.PARAMS
PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py", on_error_raise=__name__ == "__main__")

###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

Sample = PipelineTracks.AutoSample

# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob("*.bam"), "(\S+).bam")

# group by experiment (assume that last field is a replicate identifier)
EXPERIMENTS = PipelineTracks.Aggregate(TRACKS, labels=("condition", "tissue"))
###################################################
###################################################
###################################################
## Pipeline configuration
###################################################

# load options from the config file
import CGAT.Pipeline as P
P.getParameters( 
    ["%s.ini" % __file__[:-len(".py")],
     "../pipeline.ini",
     "pipeline.ini" ] )

PARAMS = P.PARAMS
PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"],
                                       "pipeline_annotations.py" )

###################################################################
###################################################################
## Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

# define some tracks if needed
TRACKS = PipelineTracks.Tracks( PipelineTracks.Sample ).loadFromDirectory( 
    glob.glob("*.ini" ), "(\S+).ini" )


###################################################################
###################################################################
###################################################################
Esempio n. 3
0
import CGAT.GTF as GTF

# load options from the config file
import CGAT.Pipeline as P
P.getParameters([
    "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini",
    "pipeline.ini"
])

PARAMS = P.PARAMS

USECLUSTER = True

# link up with annotations
PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py",
                                      on_error_raise=__name__ == "__main__")

# link up with ancestral repeats
PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"],
                                            "pipeline_ancestral_repeats.py")

###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory(
    glob.glob("*.gtf.gz"),
Esempio n. 4
0
import logging as L
from ruffus import *
import CGATPipelines.PipelineMapping as PipelineMapping

USECLUSTER = True

###################################################
###################################################
###################################################
## Pipeline configuration
###################################################
import CGAT.Pipeline as P
P.getParameters(
    ["%s.ini" % __file__[:-len(".py")], "../pipeline.ini", "pipeline.ini"])
PARAMS = P.PARAMS
PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py")


###################################################################
###################################################################
###################################################################
## TRIM READS
@follows(mkdir("trim"))
@transform("*.gz", regex(r"(\S+).gz"), r"trim/\1.gz")
def trimReads(infile, outfile):
    '''trim reads with FastX'''
    to_cluster = True

    tmpdir_fastq = P.getTempDir()
    track = P.snip(os.path.basename(infile), ".gz")
    statement = """gunzip < %(infile)s | python %%(scriptsdir)s/fastq2fastq.py 
import PipelineGeneset as PGeneset
import PipelineAnnotator as PAnnotator

# load options from the config file
import CGAT.Pipeline as P
P.getParameters( 
    ["%s/pipeline.ini" % os.path.splitext(__file__)[0],
     "../pipeline.ini",
     "pipeline.ini" ] )

PARAMS = P.PARAMS

USECLUSTER = True

## link up with annotations
PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"],
                                       "pipeline_annotations.py" )

## link up with ancestral repeats
PARAMS_ANCESTRAL_REPEATS = P.peekParameters( PARAMS["ancestral_repeats_dir"],
                                            "pipeline_ancestral_repeats.py" )

###################################################################
###################################################################
## Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks( PipelineTracks.Sample ).loadFromDirectory( 
    glob.glob( "*.gtf.gz" ), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz") )
Esempio n. 6
0
###################################################
###################################################
###################################################
# Pipeline configuration
###################################################
import CGAT.Pipeline as P
P.getParameters([
    "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini",
    "pipeline.ini"
],
                defaults={'annotations_dir': ""})

PARAMS = P.PARAMS

PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py",
                                      on_error_raise=__name__ == "__main__")

###################################################################
###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
# load all tracks - exclude input/control tracks
Sample = PipelineTracks.Sample

TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob("*.bed.gz"), "(\S+).bed.gz")

TRACKS_BEDFILES = ["%s.bed.gz" % x for x in TRACKS]
Esempio n. 7
0

###################################################################
###################################################################
## parameterization

EXPORTDIR=P['chipseq_exportdir']
DATADIR=P['chipseq_datadir']
DATABASE=P['chipseq_backend']

###################################################################
# cf. pipeline_chipseq.py
# This should be automatically gleaned from pipeline_chipseq.py
###################################################################
import CGAT.Pipeline as Pipeline
PARAMS_PIPELINE = Pipeline.peekParameters( ".",
                                           "pipeline_chipseq.py" )

import CGATPipelines.PipelineTracks as PipelineTracks

Sample = PipelineTracks.Sample3

suffixes = ["export.txt.gz",
            "sra",
            "fastq.gz",
            "fastq.1.gz",
            "csfasta.gz" ]

TRACKS = sum( itertools.chain( [ PipelineTracks.Tracks( Sample ).loadFromDirectory( 
        [ x for x in glob.glob( "%s/*.%s" % (DATADIR, s) ) if "input" not in x ],
        "%s/(\S+).%s" % (DATADIR, s) ) for s in suffixes ] ), 
              PipelineTracks.Tracks( Sample ) )
Esempio n. 8
0
from AnnotationReport import *

import CGAT.Pipeline as P
import CGATCore.IOTools as IOTools

PARAMS = P.peekParameters(".",
                          "pipeline_annotations.py",
                          on_error_raise=__name__ == "__main__",
                          prefix="annotations_",
                          update_interface=True)


class AnnotationStatus(Status):
    '''status information for annotations.


    '''
    tracks = [
        x for x, y in list(PARAMS.items())
        if str(y).endswith((".bed.gz", ".gtf.gz", ".gff.gz", ".tsv.gz",
                            ".tsv"))
    ]

    slices = ('AnnotationIsPresent', )

    def testAnnotationIsPresent(self, track):
        '''
        PASS: File exists and is not empty

        FAIL: File exists and is empty (no data except comments)
Esempio n. 9
0
from GeneSetsReport import *

import CGAT.Pipeline as P
import CGAT.IOTools as IOTools

PARAMS = P.peekParameters(
    ".",
    "pipeline_annotations.py",
    on_error_raise=__name__ == "__main__",
    prefix="annotations_",
    update_interface=True)


class HypergeometricStatus(Status):
    '''status information for annotations.
    '''
    pattern = 'hypergeometric_(\S+)_summary$'

    slices = ('SignificantResults',)

    def testSignificantResults(self, track):
        '''
        PASS: Genes have been found in foreground and significant results
              exist.

        WARN: Genes have been found in foreground, but no significant results
              exist.

        FAIL: No genes in foreground sets.

        The value indicates the number of significant results.
Esempio n. 10
0
import PipelineGeneset as PGeneset
import PipelineAnnotator as PAnnotator

# load options from the config file
import CGAT.Pipeline as P
P.getParameters([
    "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini",
    "pipeline.ini"
])

PARAMS = P.PARAMS

USECLUSTER = True

# link up with annotations
PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py")

# link up with ancestral repeats
PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"],
                                            "pipeline_ancestral_repeats.py")

###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory(
    glob.glob("*.gtf.gz"),
    "(\S+).gtf.gz",
Esempio n. 11
0
import PipelineGeneset as PGeneset

# load options from the config file
import CGAT.Pipeline as P
P.getParameters(
    ["%s/pipeline.ini" % os.path.splitext(__file__)[0],
     "../pipeline.ini",
     "pipeline.ini"])

PARAMS = P.PARAMS

USECLUSTER = True

# link up with annotations
PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"],
                                      "pipeline_annotations.py", on_error_raise=__name__ == "__main__")

# link up with ancestral repeats
PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"],
                                            "pipeline_ancestral_repeats.py")

###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory(
    glob.glob("*.gtf.gz"), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz"))