def __call__(self, track, slice=None): exp_statement = """ SELECT TPM, gene_id, sample_name FROM sailfish_genes AS A JOIN samples AS B ON A.sample_id = B.id""" exp_df = self.getDataFrame(exp_statement) factors_statement = ''' SELECT factor, factor_value, sample_name FROM samples AS A JOIN factors AS B ON A.id = B.sample_id WHERE factor != 'genome' ''' factors_df = self.getDataFrame(factors_statement) merged_df = pd.merge(exp_df, factors_df, left_on="sample_name", right_on="sample_name") genes = Pipeline.asList( Pipeline.peekParameters( ".", "pipeline_rnaseqqc.py")['genes_of_interest']) interest_df = merged_df[merged_df['gene_id'].isin(genes)] interest_df['TPM'] = interest_df['TPM'].astype(float) return interest_df.reset_index().set_index("factor")
# load options from the config file PARAMS = P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) # add configuration values from associated pipelines # # 1. pipeline_annotations: any parameters will be added with the # prefix "annotations_". The interface will be updated with # "annotations_dir" to point to the absolute path names. PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True)) GENESETS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.gtf.gz"), "(\S+).gtf.gz") TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample3) TRACKS = TRACKS.loadFromDirectory(glob.glob("*.bam"), "(\S+).bam") REPLICATE = PipelineTracks.Aggregate(TRACKS, labels=("replicate")) CONDITION = PipelineTracks.Aggregate(TRACKS, labels=("condition", "tissue")) # if necessary, update the PARAMS dictionary in any modules file.
import pysam # load options from the config file PARAMS = P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) # add configuration values from associated pipelines # # 1. pipeline_annotations: any parameters will be added with the # prefix "annotations_". The interface will be updated with # "annotations_dir" to point to the absolute path names. PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True)) # define some tracks if needed TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.ini"), "(\S+).ini") # --------------------------< utility functions >---------------------------- # def connect(): '''Connect to database. Use this method to connect to additional databases. Returns an sqlite3 database handle.
import CGAT.Stats as Stats import pysam import CGATPipelines.PipelineTracks as PipelineTracks ################################################################### ################################################################### ################################################################### # Pipeline configuration import CGATPipelines.Pipeline as P P.getParameters(["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py") SEPARATOR = "|" ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc class TracksVCF (PipelineTracks.Tracks): def load(self, filename, exclude=None): '''load tracks from a vcf file.''' tracks = []
P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={"annotations_dir": "", "genesets_abinitio_coding": "pruned.gtf.gz", "genesets_abinitio_lncrna": "pruned.gtf.gz", "genesets_reference": "reference.gtf.gz", "genesets_refcoding": "refcoding.gtf.gz", "genesets_previous": ""}) PARAMS = P.PARAMS PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py", prefix="annotations_", update_interface=True)) PREVIOUS = P.asList(PARAMS["genesets_previous"]) def connect(): '''connect to database. This method also attaches to helper databases. ''' dbh = sqlite3.connect(PARAMS["database_name"]) statement = '''ATTACH DATABASE '%s' as annotations''' % ( PARAMS["annotations_database"])
################################################### ################################################### ################################################### # Pipeline configuration ################################################### P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ], defaults={'paired_end': False}) PARAMS = P.PARAMS PARAMS.update( P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py", prefix="annotations_", update_interface=True)) PipelinePeakcalling.PARAMS = PARAMS PipelineMotifs.PARAMS = PARAMS ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks # determine the location of the input files (reads). DATADIR = PARAMS.get('input', '.') if not os.path.exists(DATADIR): raise OSError('data directory %s does not exists')
import CGATPipelines.Pipeline as Pipeline import CGATPipelines.PipelineTracks as PipelineTracks ################################################################### ################################################################### # parameterization EXPORTDIR = P.get('calling_exportdir', P.get('exportdir', 'export')) DATADIR = P.get('calling_datadir', P.get('datadir', '.')) DATABASE = P.get('calling_backend', P.get('report_sql_backend', 'sqlite:///./csvdb')) ################################################################### # cf. pipeline_chipseq.py # This should be automatically gleaned from pipeline_chipseq.py ################################################################### PARAMS_PIPELINE = Pipeline.peekParameters(".", "pipeline_chipseq.py") Sample = PipelineTracks.Sample3 suffixes = ["export.txt.gz", "sra", "fastq.gz", "fastq.1.gz", "csfasta.gz"] TRACKS = sum( itertools.chain([ PipelineTracks.Tracks(Sample).loadFromDirectory([ x for x in glob.glob("%s/*.%s" % (DATADIR, s)) if "input" not in x ], "%s/(\S+).%s" % (DATADIR, s)) for s in suffixes ]), PipelineTracks.Tracks(Sample)) Sample.setDefault("asTable") ALL = PipelineTracks.Aggregate(TRACKS)
from CGATReport.Utils import PARAMS as P import CGATPipelines.Pipeline as Pipeline import CGATPipelines.PipelineTracks as PipelineTracks ################################################################### ################################################################### # parameterization EXPORTDIR = P.get('calling_exportdir', P.get('exportdir', 'export')) DATADIR = P.get('calling_datadir', P.get('datadir', '.')) DATABASE = P.get('calling_backend', P.get('report_sql_backend', 'sqlite:///./csvdb')) ################################################################### # cf. pipeline_chipseq.py # This should be automatically gleaned from pipeline_chipseq.py ################################################################### PARAMS_PIPELINE = Pipeline.peekParameters(".", "pipeline_chipseq.py") Sample = PipelineTracks.Sample3 suffixes = ["export.txt.gz", "sra", "fastq.gz", "fastq.1.gz", "csfasta.gz"] TRACKS = sum(itertools.chain([PipelineTracks.Tracks(Sample).loadFromDirectory( [x for x in glob.glob("%s/*.%s" % (DATADIR, s)) if "input" not in x], "%s/(\S+).%s" % (DATADIR, s)) for s in suffixes]), PipelineTracks.Tracks(Sample))
import CGAT.Database as Database import CGAT.GTF as GTF import CGATPipelines.PipelineTracks as PipelineTracks # load options from the config file P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ]) PARAMS = P.PARAMS USECLUSTER = True # link up with annotations PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py") # link up with ancestral repeats PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"], "pipeline_ancestral_repeats.py") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.gtf.gz"), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz"))
import CGAT.GTF as GTF import CGATPipelines.PipelineTracks as PipelineTracks # load options from the config file P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) PARAMS = P.PARAMS USECLUSTER = True # link up with annotations PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py") # link up with ancestral repeats PARAMS_ANCESTRAL_REPEATS = P.peekParameters( PARAMS["ancestral_repeats_dir"], "pipeline_ancestral_repeats.py") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.gtf.gz"), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz"))
################################################################### ################################################################### # Load options and annotations ################################################################### # load options from the config file PARAMS = P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) # add configuration values from associated pipelines PARAMS = P.PARAMS PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_genesets.py", prefix="annotations_", update_interface=True, restrict_interface=True)) # add config values from associated pipelines # The DEXSeq R directory contains important python helper functions PYTHONSCRIPTSDIR = R(''' f = function(){ pythonScriptsDir = system.file("python_scripts", package="DEXSeq") } f()''').tostring() ################################################################### ################################################################### ################################################################### # Utility functions
# load options from the config file P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ]) PARAMS = P.PARAMS # Add parameters from the gtf_subset pipeline, but # only the interface section. All PARAMS options # will have the prefix `annotations_` PARAMS.update( P.peekParameters(PARAMS["gtf_dir"], "pipeline_genesets.py", prefix="annotations_", update_interface=True, restrict_interface=True)) # ----------------------------------------------- # Utility functions def connect(): '''utility function to connect to database. Use this method to connect to the pipeline database. Additional databases can be attached here as well. Returns an sqlite3 database handle. '''
# load options from the config file P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) PARAMS = P.PARAMS # Add parameters from the gtf_subset pipeline, but # only the interface section. All PARAMS options # will have the prefix `annotations_` PARAMS.update(P.peekParameters( PARAMS["gtf_dir"], "pipeline_genesets.py", prefix="annotations_", update_interface=True, restrict_interface=True)) # ----------------------------------------------- # Utility functions def connect(): '''utility function to connect to database. Use this method to connect to the pipeline database. Additional databases can be attached here as well. Returns an sqlite3 database handle. '''