コード例 #1
0
def mergeAndLoadTrackComparisons(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=(".+/(.+)_to_(.+).tomtom"),
                         cat="track1,track2",
                         options="-i track1 -i track2")
コード例 #2
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadFractionReadsSpliced(infiles, outfile):
    '''load to fractions of spliced reads to a single db table'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/.*/(.*).fraction.spliced",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #3
0
def loadGeneProfiles(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename='.+/(.+)\-(.+)\-(.+)\.(.+).tsv.gz',
                         cat="factor,condition,rep,interval",
                         options="-i factor -i condition -i rep -i interval")
コード例 #4
0
def loadDistances(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".+/(.+)_vs_(.+).tsv.gz",
                         cat="File1,File2",
                         options="-i Track, -i File2")
コード例 #5
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadAlignmentSummaryMetrics(infiles, outfile):
    '''load the complexity metrics to a single table in the db'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/.*/(.*).alignment.summary.metrics",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #6
0
def loadCounts(infiles, outfile):
    '''Merge feature counts data into one table'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename="(.+).tsv.gz",
                         options="-i track,gene_id")
コード例 #7
0
def loadClusterContextStats(infiles, outfile):

    P.concatenateAndLoad(
        infiles,
        outfile,
        regex_filename=
        "clusters.dir/(.+)(?:sig_bases|clusters).context_stats.tsv.gz")
コード例 #8
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadCollectRnaSeqMetrics(infiles, outfile):
    '''load the metrics to the db'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/.*/(.*).rnaseq.metrics",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #9
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadThreePrimeBias(infiles, outfile):
    '''load the metrics to the db'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/.*/(.*).three.prime.bias",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #10
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadCopyNumber(infiles, outfile):
    '''load the copy number estimations to the database'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/(.*).copynumber",
                         options='-i "gene_id"',
                         job_memory=PARAMS["sql_himem"])
コード例 #11
0
def loadReproducibility(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         cat="Experiment",
                         regex_filename=".+/(.+)-agg.reproducibility.tsv.gz",
                         options="-i Track -i fold -i level")
コード例 #12
0
def loadDedupedUMIStats(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".+/(.+).umi_stats.tsv.gz",
                         cat="track",
                         options="-i track -i UMI")
コード例 #13
0
def load_dedup_kmers(infiles, outfile):

    P.concatenateAndLoad(
        infiles,
        outfile,
        regex_filename="kmers.dir/(.+)-(.+)-(.+)\.([0-9]+mers).tsv.gz",
        cat="factor,tag,replicate,k",
        options="-i factor -i tag -i replicate -i kmer")
コード例 #14
0
def loadCrosslinkedBasesCount(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".+/(.+).count_bases",
                         header="track,count",
                         cat="track",
                         has_titles=False)
コード例 #15
0
def loadAlignmentSummaryMetrics(infiles, outfile):
    '''load the complexity metrics to a single table in the db'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/.*/(.*).alignment.summary.metrics",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #16
0
def loadThreePrimeBias(infiles, outfile):
    '''load the metrics to the db'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/.*/(.*).three.prime.bias",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #17
0
def loadCollectRnaSeqMetrics(infiles, outfile):
    '''load the metrics to the db'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/.*/(.*).rnaseq.metrics",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #18
0
def loadCopyNumber(infiles, outfile):
    '''load the copy number estimations to the database'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/(.*).copynumber",
                         options='-i "gene_id"',
                         job_memory=PARAMS["sql_himem"])
コード例 #19
0
def loadFractionReadsSpliced(infiles, outfile):
    '''load to fractions of spliced reads to a single db table'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/.*/(.*).fraction.spliced",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #20
0
def loadTranscriptClassification(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".+/(.+).class.gz",
                         options="-i transcript_id -i gene_id"
                         "-i match_gene_id -i match_transcript_id"
                         "-i source --quick")
コード例 #21
0
def merge_mismatch_counts(infiles, outfile):
    '''Load the results of mismatch counting into the database'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename="mismatches.dir/(CB|FC)-(.+).tsv.gz",
                         cat="tissue,replicate",
                         options="-i tissue -i replicate -i gene_id")
コード例 #22
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadSpikeVsGenome(infiles, outfile):
    '''Load number of reads uniquely mapping to genome & spike-ins
       and fraction of spike-ins to a single db table'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/.*/(.*).uniq.mapped.reads",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #23
0
ファイル: pipeline_iCLIP.py プロジェクト: shulp2211/UMIpipe
def load_sig_exon_counts(infiles, outfile):

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename="dedup_(.+).dir/(.+).exon_sig_count.tsv.gz",
                         cat="method,track",
                         has_titles=False,
                         header="method,track,name,count",
                         options="-i method -i track")
コード例 #24
0
def loadNspliced(infiles, outfile):
    P.concatenateAndLoad(
        infiles,
        outfile,
        regex_filename=".+/(.+).nspliced.txt",
        cat="track",
        has_titles=False,
        header="track,nspliced",
    )
コード例 #25
0
def loadSpikeVsGenome(infiles, outfile):
    '''Load number of reads uniquely mapping to genome & spike-ins
       and fraction of spike-ins to a single db table'''

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/.*/(.*).uniq.mapped.reads",
                         cat="sample_id",
                         options='-i "sample_id"')
コード例 #26
0
ファイル: pipeline_iCLIP.py プロジェクト: shulp2211/UMIpipe
def load_node_counts(infiles, outfile):
    '''Load the number of counts per cluster distribution - only
    output if method was cluster as it will be the same irrespective
    of the network method used'''

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".+/(.+)_nodes.tsv",
                         has_titles=False,
                         header="track,category,count")
コード例 #27
0
def loadClusterCounts(infiles, outfile):

    P.concatenateAndLoad(
        infiles,
        outfile,
        regex_filename=".+/(.+).(R[0-9]+|union|reproducible).cluster_count",
        header="sample,replicate,count",
        cat="sample,replicate",
        has_titles=False)
コード例 #28
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadFeatureCounts(infiles, outfile):

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".*/(.*).counts.gz",
                         has_titles=False,
                         cat="track",
                         header="track,gene_id,counts",
                         options='-i "gene_id"',
                         job_memory=PARAMS["sql_himem"])
コード例 #29
0
def mergeAllQuants(infiles, outfile):
    job_memory = "6G"
    P.concatenateAndLoad(
        infiles,
        outfile,
        regex_filename="quantification.dir/(.*-.*-.*)_agg-agg-agg.sf",
        options="-i Name -i Length -i EffectiveLength"
        "-i TPM -i NumReads -i track"
        "-i source --quick")
コード例 #30
0
def loadFeatureCounts(infiles, outfile):

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".*/(.*).counts.gz",
                         has_titles=False,
                         cat="track",
                         header="track,gene_id,counts",
                         options='-i "gene_id"',
                         job_memory=PARAMS["sql_himem"])
コード例 #31
0
def loadTranscriptProfiles(infiles, outfile):
    ''' concatenate and load the transcript profiles
    Retain sample name as column = "track'''

    regex = ("transcriptprofiles.dir/(\S+).transcriptprofile.gz."
             "geneprofileabsolutedistancefromthreeprimeend.matrix.tsv.gz")

    infiles = [x + ".geneprofileabsolutedistancefromthreeprimeend.matrix.tsv.gz" for x in infiles]

    P.concatenateAndLoad(infiles, outfile, regex_filename=regex)
コード例 #32
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadEstimateLibraryComplexity(infiles, outfile):
    '''load the complexity metrics to a single table in the db'''

    if PAIRED:
        P.concatenateAndLoad(infiles, outfile,
                             regex_filename=".*/.*/(.*).library.complexity",
                             cat="sample_id",
                             options='-i "sample_id"')
    else:
        statement = '''echo "Not compatible with SE data"
                       > %(outfile)s'''
        P.run()
コード例 #33
0
def loadEstimateLibraryComplexity(infiles, outfile):
    '''load the complexity metrics to a single table in the db'''

    if PAIRED:
        P.concatenateAndLoad(infiles,
                             outfile,
                             regex_filename=".*/.*/(.*).library.complexity",
                             cat="sample_id",
                             options='-i "sample_id"')
    else:
        statement = '''echo "Not compatible with SE data"
                       > %(outfile)s'''
        P.run()
コード例 #34
0
def loadContextStats(infiles, outfile):

    if "saturation" in infiles[0]:
        regex_filename = ".+/(.+-.+-.+)\.([0-9]+\.[0-9]+).reference_context.tsv"
        cat = "track,subset"
    else:
        regex_filename = ".+/(.+).reference_context.tsv"
        cat = "track"

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=regex_filename,
                         cat=cat)
コード例 #35
0
def loadCramQuality(infiles, outfile):
    ''' Load the quality scores for the different cells
        into the database (summarized table).
    '''

    quality_files = [fn
                     for filenames in infiles
                     for fn in filenames
                     if fn.endswith(".quality")]

    P.concatenateAndLoad(quality_files, outfile,
                         regex_filename="validate.cram.dir/(.*).quality",
                         cat="track",
                         has_titles=False,
                         header="cramID,number_reads,cram_quality_score")
コード例 #36
0
def loadUtronIDs(infiles, outfile):

    header = "track,transcript_id"
    options = "-i track -i transcript_id"

    if not outfile == "all_utrons_ids.load":
        header += ",match_transcript_id"
        options += "-i match_transcript_id"

    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".+/(.+)\..+\.ids.gz",
                         has_titles=False,
                         cat="track",
                         header=header,
                         options=options)
コード例 #37
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadInsertSizeMetrics(infiles, outfile):
    '''load the insert size metrics to a single table'''

    if PAIRED:
        picard_summaries = [x[0] for x in infiles]

        P.concatenateAndLoad(picard_summaries, outfile,
                             regex_filename=(".*/.*/(.*)"
                                             ".insert.size.metrics.summary"),
                             cat="sample_id",
                             options='')

    else:
        statement = '''echo "Not compatible with SE data"
                       > %(outfile)s
                    '''
        P.run()
コード例 #38
0
ファイル: pipeline_scrnaseq.py プロジェクト: snsansom/scseq
def loadInsertSizeHistograms(infiles, outfile):
    '''load the histograms to a single table'''

    if PAIRED:
        picard_histograms = [x[1] for x in infiles]

        P.concatenateAndLoad(picard_histograms, outfile,
                             regex_filename=(".*/.*/(.*)"
                                             ".insert.size.metrics.histogram"),
                             cat="sample_id",
                             options='-i "insert_size" -e')

    else:
        statement = '''echo "Not compatible with SE data"
                       > %(outfile)s
                    '''
        P.run()
コード例 #39
0
def load_dexseq(infiles, outfile):

    statement = " checkpoint;".join([
        " sed 's/log2fold_\S+/log2fold/' %s > %s.tmp;" % (f, f)
        for f in infiles
    ])

    P.run()

    infiles = ["%s.tmp" % f for f in infiles]
    P.concatenateAndLoad(infiles,
                         outfile,
                         regex_filename=".+/(.+).dexseq.tsv.tmp",
                         options="-i groupID -i featureID -i track -i padj",
                         job_memory="6G")

    for f in infiles:
        os.unlink(f)
コード例 #40
0
def loadInsertSizeMetrics(infiles, outfile):
    '''load the insert size metrics to a single table'''

    if PAIRED:
        picard_summaries = [x[0] for x in infiles]

        P.concatenateAndLoad(picard_summaries,
                             outfile,
                             regex_filename=(".*/.*/(.*)"
                                             ".insert.size.metrics.summary"),
                             cat="sample_id",
                             options='')

    else:
        statement = '''echo "Not compatible with SE data"
                       > %(outfile)s
                    '''
        P.run()
コード例 #41
0
def loadInsertSizeHistograms(infiles, outfile):
    '''load the histograms to a single table'''

    if PAIRED:
        picard_histograms = [x[1] for x in infiles]

        P.concatenateAndLoad(picard_histograms,
                             outfile,
                             regex_filename=(".*/.*/(.*)"
                                             ".insert.size.metrics.histogram"),
                             cat="sample_id",
                             options='-i "insert_size" -e')

    else:
        statement = '''echo "Not compatible with SE data"
                       > %(outfile)s
                    '''
        P.run()
コード例 #42
0
ファイル: pipeline_caputrec.py プロジェクト: sudlab/Capture_C
def loadAnomolies(infiles, outfile):

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".+/(.+).anomolies.tsv.gz",
                         options="-i track -i probe")
コード例 #43
0
ファイル: pipeline_caputrec.py プロジェクト: sudlab/Capture_C
def loadInteractionCountMetrics(infiles, outfile):

    infiles = [re.sub(".tsv.gz",".metrics.tsv", infile)
               for infile in infiles]
    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".+/(.+).metrics.tsv")
コード例 #44
0
ファイル: pipeline_caputrec.py プロジェクト: sudlab/Capture_C
def loadInteractionCounts(infiles, outfile):

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename=".+/(.+).tsv.gz",
                         options = "-i track -i Frag1 -i Frag2")
コード例 #45
0
def load_exon_counts(infiles, outfile):

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename="dedup_(.+).dir/(.+).exon_count.tsv.gz",
                         cat="method,track",
                         options="-i method -i track -i gene_id")
コード例 #46
0
def load_base_level_reproducibility(infiles, outfile):

    P.concatenateAndLoad(infiles, outfile,
                         regex_filename="dedup_(.+).dir/.+.rep",
                         cat="method",
                         options="-i method")