def loadPicardRNAseqMetrics(infiles, outfile): '''load picardRNAseqMetrics''' P.concatenate_and_load(infiles, outfile, regex_filename="(.*).picardRNAseqMetrics", cat="sample_id", options='-i "sample_id"')
def merge_and_load_region_metagenes(infiles, outfile): P.concatenate_and_load( infiles, outfile, regex_filename=".+/(.+)-(.+)-(.+)\.vs\.(.+).tsv.gz", cat="source,condition,replicate,geneset", options=" -i source -i condition -i replicate -i geneset")
def loadpicardAlignmentSummary(infiles, outfile): '''load the complexity metrics to a single table in the db''' P.concatenate_and_load(infiles, outfile, regex_filename="(.*).picardAlignmentStats", cat="sample_id", options='-i "sample_id"')
def load_quasar(infiles, outfile): P.concatenate_and_load( infiles, outfile, regex_filename="quasar_out.dir/(.+).pileup.bed.quasarin.tsv", options= "-i track -i annotations.rsID -i annotations.chr -i annotations.pos0 --replace-header", header="track,rsID,chr,pos,betas,beta_ses,pval")
def mergeAllQuants(infiles, outfile): job_memory = "6G" P.concatenate_and_load(infiles, outfile, regex_filename="quantification.dir/(.+).sf", options="-i transcript_id" " -i Name -i Length -i EffectiveLength" " -i TPM -i NumReads -i track" " -i source")
def loadTranscriptClassification(infiles, outfile): P.concatenate_and_load(infiles, outfile, regex_filename=".+/(.+).class.gz", options="-i transcript_id -i gene_id" " -i match_gene_id -i match_transcript_id" " -i source", job_memory="64G")
def loadDuplicationMetrics(infiles, outfile): ''' Import duplication metrics into project database. ''' P.concatenate_and_load( infiles, outfile, regex_filename="(.*)-count/.*.txt", has_titles=True, options="", cat="sample")
def loadRawQcMetricsPerBarcode(infiles, outfile): ''' load the total UMI and barcode rank into a sqlite database ''' P.concatenate_and_load( infiles, outfile, regex_filename="(.*)-count/.*.txt", has_titles=True, options="", cat="sample")
def loadCellrangerCountMetrics(infiles, outfile): ''' load the summary statistics for each run into a csvdb file ''' P.concatenate_and_load( infiles, outfile, regex_filename="(.*)-count/.*.txt", has_titles=True, options="", cat="sample")
def loadSalmon(infiles, outfile): '''load the salmon results''' tables = [x.replace(".log", "/quant.sf") for x in infiles] P.concatenate_and_load(tables, outfile, regex_filename=".*/(.*)/quant.sf", cat="sample_id", options="-i Name", job_memory=PARAMS["sql_himem"])
def mergeAllQuants(infiles, outfile): job_threads = 3 P.concatenate_and_load( infiles, outfile, regex_filename="quantification.dir/(.*)_agg-agg-agg.sf", options="-i Name -i Length -i EffectiveLength" " -i TPM -i NumReads -i track" " -i source", job_memory="64G") if not os.path.isfile("mapping_rates.txt"): statement = ''' bash /shared/sudlab1/General/projects/UTRONs/MyFiles/scripts/mapping_rates_script.sh ''' P.run(statement) else: pass
def loadUtronIDs(infiles, outfile): job_threads = 3 header = "track,transcript_id" options = "-i track -i transcript_id" if not outfile == "all_utrons_ids.load": header += ",match_transcript_id" options += " -i match_transcript_id" P.concatenate_and_load(infiles, outfile, regex_filename=".+/(.+)\..+\.ids.gz", has_titles=False, cat="track", header=header, options=options, job_memory="64G")
def loadSeedTables(infiles, outfile): P.concatenate_and_load(infiles, outfile, regex_filename=".+/(.+)\.(?:meme|dreme|memechip).tsv")
def mergeAndLoadTrackComparisons(infiles, outfile): P.concatenate_and_load(infiles, outfile, regex_filename=(".+/(.+)_to_(.+).tomtom"), cat="track1,track2", options="-i track1 -i track2")