"*.csfasta.F3.gz", ) SEQUENCEFILES = tuple([os.path.join(DATADIR, suffix_name) for suffix_name in SEQUENCESUFFIXES]) SEQUENCEFILES_REGEX = regex( r"(\S+)-(\S+)-(\S+).(?P<suffix>fastq.1.gz|fastq.gz|sra)") Sample = PipelineTracks.AutoSample Sample.attributes = ('tissue', 'condition', 'replicate') TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( [y for x in SEQUENCESUFFIXES for y in glob.glob(x)], "(\S+).(fastq.1.gz|fastq.gz|sra)") EXPERIMENTS = PipelineTracks.Aggregate(TRACKS, labels=("tissue", "condition")) CONDITIONS = PipelineTracks.Aggregate(TRACKS, labels=("condition", )) REPLICATES = PipelineTracks.Aggregate(TRACKS, labels=("replicate", )) ######################################################################### # summarise read 3' ######################################################################### @follows(mkdir("sequence_characteristics.dir")) @transform(SEQUENCEFILES, SEQUENCEFILES_REGEX, r"sequence_characteristics.dir/\1-\2-\3.\g<suffix>_start.tsv") def summariseReadStart(infile, outfile): # this only works for fastq files. Fails with .sra files # this function and the next section should be replaced with a call to
"pipeline_annotations.py") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks Sample = PipelineTracks.AutoSample # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( glob.glob("*.bam"), "(\S+).bam") # group by experiment (assume that last field is a replicate identifier) EXPERIMENTS = PipelineTracks.Aggregate(TRACKS, labels=("condition", "tissue")) GENESETS = PipelineTracks.Tracks(Sample).loadFromDirectory( glob.glob("*.gtf.gz"), "(\S+).gtf.gz") ################################################################### ################################################################### ################################################################### def connect(): '''connect to database. This method also attaches to helper databases. '''