], defaults={'annotations_dir': ""}) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peek_parameters(PARAMS["annotations_dir"], "genesets") ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = tracks.Sample TRACKS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.bed.gz"), "(\S+).bed.gz") TRACKS_BEDFILES = ["%s.bed.gz" % x for x in TRACKS] def getAssociatedBAMFiles(track): '''return a list of BAM files associated with a track. By default, this method searches for ``track.bam`` file in the current directory and returns an offset of 0. Associations can be defined in the .yml file in the section [bams]. For example, the following snippet associates track track1 with the bamfiles :file:`track1.bam` and :file:`track2.bam`:: [bams]
################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks # determine the location of the input files (reads). DATADIR = PARAMS.get('input', '.') if not os.path.exists(DATADIR): raise OSError('data directory %s does not exists') Sample = PipelineTracks.Sample TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( glob.glob(os.path.join(DATADIR, "*.bed.gz")), "(\S+).bed.gz") BEDFILES = [os.path.join( DATADIR, "%s.bed.gz") % x for x in TRACKS] # create an indicator target @transform(BEDFILES, suffix(".gz"), ".gz") def BedFiles(infile, outfile): pass BAMFILES = glob.glob(os.path.join(DATADIR, "*.bam"))
dbh = sqlite3.connect(PARAMS["database_name"]) statement = '''ATTACH DATABASE '%s' as annotations''' % ( PARAMS["annotations_database"]) cc = dbh.cursor() cc.execute(statement) cc.close() return dbh class MySample(tracks.Sample): attributes = tuple(PARAMS["attributes"].split(",")) TRACKS = tracks.Tracks(MySample).loadFromDirectory(glob.glob("*.bam"), "(\S+).bam") Sample = tracks.AutoSample DESIGNS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.design.tsv"), "(\S+).design.tsv") ################################################################### ################################################################### ################################################################### # DEXSeq workflow ################################################################### @mkdir("results.dir") @files(PARAMS["annotations_interface_geneset_all_gtf"], "geneset_flat.gff") def buildGff(infile, outfile):
try: PARAMS["input"] except KeyError: DATADIR = "." else: if PARAMS["input"] == 0: DATADIR = "." elif PARAMS["input"] == 1: DATADIR = "data.dir" else: DATADIR = PARAMS["input"] # not recommended practise. Sample = tracks.AutoSample # collect sra and fastq.gz tracks BAM_TRACKS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.bam"), "(\S+).bam") DESIGNS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("design*.tsv"), "design(\S+).tsv") # do not use - legacy methods # here only to stop ruffus erroring. Remove once pipleine scrum is # complete and old code has been removed GENESETS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.gtf.gz"), "(\S+).gtf.gz") TRACKS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.bam"), "(\S+).bam") # group by experiment (assume that last field is a replicate identifier) EXPERIMENTS = tracks.Aggregate(BAM_TRACKS, labels=("condition", "tissue")) ###############################################################################