Python Pipeline.asList Examples

Programming Language: Python

Namespace/Package Name: CGATCore

Class/Type: Pipeline

Method/Function: asList

Examples at hotexamples.com: 9

Python Pipeline.asList - 9 examples found. These are the top rated real world Python examples of CGATCore.Pipeline.asList extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

run(30)

toTable(30)

touch(30)

snip(30)

load(30)

to_table(28)

getTempFilename(23)

get_temp_filename(23)

get_temp_file(22)

build_load_statement(18)

get_temp_dir(17)

getTempFile(17)

as_list(16)

get_params(14)

getParameters(12)

get_parameters(12)

publish_report(10)

asList(9)

tablequote(7)

concatenate_and_load(6)

peekParameters(6)

run_report(5)

submit(4)

peek_parameters(3)

start_session(3)

warn(3)

joinStatements(2)

main(2)

getTempDir(2)

mergeAndLoad(2)

getProjectId(2)

close_session(2)

substitute_parameters(1)

EmptyRunner(1)

loadParameters(1)

run_workflow(1)

publish_tracks(1)

parse_commandline(1)

merge_and_load(1)

matchParameter(1)

get_caller(1)

getParams(1)

create_view(1)

checkParameter(1)

will_run_on_cluster(1)

Example #1

Show file

File: RnaseqqcReport.py Project: kevinrue/cgat-flow

    def __call__(self, track, slice=None):

        exp_statement = """
        SELECT TPM, gene_id, sample_name
        FROM sailfish_genes AS A
        JOIN samples AS B
        ON A.sample_id = B.id"""

        exp_df = self.getDataFrame(exp_statement)

        factors_statement = '''
        SELECT factor, factor_value, sample_name
        FROM samples AS A
        JOIN factors AS B
        ON A.id = B.sample_id
        WHERE factor != 'genome'
        '''

        factors_df = self.getDataFrame(factors_statement)

        merged_df = pd.merge(exp_df, factors_df,
                             left_on="sample_name", right_on="sample_name")

        genes = Pipeline.asList(Pipeline.peekParameters(
            ".", "pipeline_rnaseqqc.py")['genes_of_interest'])

        interest_df = merged_df[merged_df['gene_id'].isin(genes)]

        interest_df['TPM'] = interest_df['TPM'].astype(float)

        return interest_df.reset_index().set_index("factor")

Example #2

Show file

File: pipeline_chains.py Project: logust79/cgat-flow

def buildIndirectMaps(infile, outfile, track):
    '''build a map between query and target, linking
    via intermediate targets.'''

    to_cluster = True

    path = P.asList(PARAMS["%s_path" % track])

    E.info("path=%s" % str(path))

    statement = []

    for stage, part in enumerate(path):
        filename = part + ".over.psl.gz"
        if not os.path.exists(filename):
            raise ValueError("required file %s for %s (stage %i) not exist." %
                             (filename, outfile, stage))

        if stage == 0:
            statement.append('''gunzip < %(filename)s''' % locals())
        else:
            statement.append('''
               pslMap stdin <(gunzip < %(filename)s) stdout
            ''' % locals())

    statement.append("gzip")

    statement = " | ".join(statement) + " > %(outfile)s " % locals()

    P.run()

Example #3

Show file

File: pipeline_rrbs.py Project: logust79/cgat-flow

def publish():
    '''publish files.'''

    # directory, files

    export_files = {"bigwigfiles": glob.glob("*/*.bigwig")}

    if PARAMS['ucsc_exclude']:
        for filetype, files in export_files.items():
            new_files = set(files)
            for f in files:
                for regex in P.asList(PARAMS['ucsc_exclude']):
                    if re.match(regex, f):
                        new_files.remove(f)
                        break

            export_files[filetype] = list(new_files)

    # publish web pages
    E.info("publishing report")
    P.publish_report(export_files=export_files)

    E.info("publishing UCSC data hub")
    P.publish_tracks(export_files)

Example #4

Show file

File: pipeline_chains.py Project: logust79/cgat-flow

    >  %(outfile)s
    '''

    P.run()


##########################################################################
##########################################################################
##########################################################################
# extracting alignments from maf files
##########################################################################
if "maf_dir" in PARAMS and "maf_tracks" in PARAMS:

    @files([(("%s/*.maf.gz" % PARAMS["maf_dir"]), "%sTo%s.raw.psl.gz" %
             (PARAMS["%s_label" % track], PARAMS["maf_master"]), track)
            for track in P.asList(PARAMS["maf_tracks"])])
    def extractPairwiseAlignmentSingleFile(infiles, outfile, track):
        '''build pairwise genomic aligment from maf files.'''

        try:
            os.remove(outfile)
        except OSError:
            pass

        genomefile = PARAMS["%s_genome" % track]

        to_cluster = True

        for infile in infiles:

            E.info("adding %s" % infile)

Example #5

Show file

        "genesets_abinitio_coding": "pruned.gtf.gz",
        "genesets_abinitio_lncrna": "pruned.gtf.gz",
        "genesets_reference": "reference.gtf.gz",
        "genesets_refcoding": "refcoding.gtf.gz",
        "genesets_previous": ""
    })

PARAMS = P.PARAMS

PARAMS.update(
    P.peekParameters(PARAMS["annotations_annotations_dir"],
                     "pipeline_annotations.py",
                     prefix="annotations_",
                     update_interface=True))

PREVIOUS = P.asList(PARAMS["genesets_previous"])


def connect():
    '''connect to database.

    This method also attaches to helper databases.
    '''

    dbh = sqlite3.connect(PARAMS["database_name"])
    statement = '''ATTACH DATABASE '%s' as annotations''' % (
        PARAMS["annotations_database"])
    cc = dbh.cursor()
    cc.execute(statement)
    cc.close()

Example #6

Show file

     "../pipeline.ini",
     "pipeline.ini"],
    defaults={
        'annotations_dir': "",
        'paired_end': False})

PARAMS = P.PARAMS

PARAMS_ANNOTATIONS = P.peekParameters(
    PARAMS["annotations_dir"],
    "pipeline_annotations.py")

# get options that are to be tested
cufflinks_options = {}
if "cufflinks_test_options" in PARAMS:
    options = P.asList(PARAMS["cufflinks_test_options"])
    for option in options:
        if option == "--pre-mrna-fraction" \
                or option == "--small-anchor-fraction" \
                or option == "--max-multiread-fraction":
            cufflinks_options[option] = [0, 0.5, 0.75, 1]
        elif option == "--min-isoform-fraction":
            cufflinks_options[option] = [0.05, 0.1, 0.5, 1]
        elif option == "--junc-alpha":
            cufflinks_options[option] = [0.001, 0.01, 0.1]
        elif option == "--min-frags-per-transfrag":
            cufflinks_options[option] = [1, 5, 10]
        elif option == "--overhang-tolerance":
            cufflinks_options[option] = [0, 2, 5, 8]
        elif option == "--overlap-radius":
            cufflinks_options[option] = [50, 100, 200]

Example #7

Show file

File: pipeline_medip.py Project: logust79/cgat-flow

              --fdr=%(edger_fdr)f"
              | grep -v "warnings"
              | gzip
              > %(outfile)s '''

    P.run()


@follows(aggregateTiledReadCounts,
         mkdir(os.path.join(PARAMS["exportdir"], "diff_methylation")))
@files([((data, design), "diff_methylation/%s_%s.deseq.gz" %
         (P.snip(os.path.basename(data),
                 ".counts.tsv.gz"), P.snip(os.path.basename(design), ".tsv")))
        for data, design in itertools.product(
            glob.glob("diff_methylation/*.counts.tsv.gz"),
            P.asList(PARAMS["deseq_designs"]))])
def runDESeq(infiles, outfile):
    '''estimate differential expression using DESeq.

    The final output is a table. It is slightly edited such that
    it contains a similar output and similar fdr compared to cuffdiff.
    '''

    runDE(infiles, outfile, "deseq")


#########################################################################
#########################################################################
#########################################################################

Example #8

Show file

    P.run()

    statement = '''find %(resultsdir)s -not -name "*.err.*" -exec cat {} \; 
                | gzip 
                > %(outfile)s'''
    P.run()


###################################################################
###################################################################
###################################################################


@files([(x, "%s_%s.output.gz" % (x[:-len(".features.gz")], y), y)
        for x, y in itertools.product(glob.glob("*.features.gz"),
                                      P.asList(PARAMS["polyphen_models"]))])
def runPolyphen(infile, outfile, model):
    '''run POLYPHEN on feature tables to classify SNPs.
    '''

    to_cluster = False

    # need to run in chunks for large feature files
    statement = """gunzip 
        < %(infile)s
        | %(cmd-farm)s
            --split-at-lines=10000
            --output-header
        "perl %(polyphen_home)s/bin/run_weka_cpp.pl 
           -l %(polyphen_home)s/models/%(model)s.UniRef100.NBd.f11.model
           -p

Example #9

Show file

    statement = '''find %(resultsdir)s -not -name "*.err.*" -exec cat {} \; > %(outfile)s'''
    P.run()


###################################################################
###################################################################
###################################################################
# do not run in parallel. run_weka.pl creates a $testfile
# that is not unique. run_weka.pl and pph2arff.pl could either
# be patched or the following jobs run in sequence.


@jobs_limit(1, "polyphen")
@files([(buildPolyphenFeatures, "polyphen_%s.output.gz" % x, x)
        for x in P.asList(PARAMS["polyphen_models"])])
def runPolyphen(infile, outfile, model):
    '''run POLYPHEN on feature tables to classify SNPs.
    '''
    # options
    # -f: feature set, default is F11
    # -c: classifier, default is NBd (Naive Bayes with discretization)
    # -l: model name, default is HumDiv

    statement = '''
    %(polyphen_home)s/bin/run_weka.pl
           -l %(polyphen_home)s/models/%(model)s.UniRef100.NBd.f11.model
           %(infile)s
    | gzip
    > %(outfile)s
    2> %(outfile)s.log