def loadTranscriptClassification(infiles, outfile): P.concatenate_and_load(infiles, outfile, regex_filename=".+/(.+).class.gz", options="-i transcript_id -i gene_id" " -i match_gene_id -i match_transcript_id" " -i source", job_memory="64G")
def loadCramQuality(infiles, outfile): ''' Load the quality scores for the different cells into the database (summarized table). ''' quality_files = [ fn for filenames in infiles for fn in filenames if fn.endswith(".quality") ] P.concatenate_and_load(quality_files, outfile, regex_filename="validate.cram.dir/(.*).quality", cat="track", has_titles=False, header="cramID,number_reads,cram_quality_score")
def mergeAllQuants(infiles, outfile): job_threads = 3 P.concatenate_and_load( infiles, outfile, regex_filename="quantification.dir/(.*)_agg-agg-agg.sf", options="-i Name -i Length -i EffectiveLength" " -i TPM -i NumReads -i track" " -i source", job_memory="64G") if not os.path.isfile("mapping_rates.txt"): statement = ''' bash /shared/sudlab1/General/projects/UTRONs/MyFiles/scripts/mapping_rates_script.sh ''' P.run(statement) else: pass
def loadUtronIDs(infiles, outfile): job_threads = 3 header = "track,transcript_id" options = "-i track -i transcript_id" if not outfile == "all_utrons_ids.load": header += ",match_transcript_id" options += " -i match_transcript_id" P.concatenate_and_load(infiles, outfile, regex_filename=".+/(.+)\..+\.ids.gz", has_titles=False, cat="track", header=header, options=options, job_memory="64G")
def loadSeedTables(infiles, outfile): P.concatenate_and_load(infiles, outfile, regex_filename=".+/(.+)\.(?:meme|dreme|memechip).tsv")
def mergeAndLoadTrackComparisons(infiles, outfile): P.concatenate_and_load(infiles, outfile, regex_filename=(".+/(.+)_to_(.+).tomtom"), cat="track1,track2", options="-i track1 -i track2")