Python Query Examples

Programming Language: Python

Namespace/Package Name: SeaCOW

Class/Type: Query

Examples at hotexamples.com: 7

Python Query - 7 examples found. These are the top rated real world Python examples of SeaCOW.Query extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

string(10)

attributes(5)

container(5)

corpus(5)

max_hits(4)

references(4)

structures(4)

set_deduplication(3)

processor(2)

run(2)

Query(1)

random_subset(1)

Example #1

Show file

# -*- coding: utf-8 -*-

from SeaCOW import Query, Nonprocessor

# Create a Query object and set whatever needs to be set.
q = Query()
q.corpus = 'decow16a-nano'  # Lower-case name of the corpusto use.
q.string = '[word="Gartenzwerg"]'  # A normal CQL string as used in NoSketchEngine.
q.max_hits = -1  # Maximal number of hits to return. Ignored for Nonprocessor.
q.attributes = []  # For counting, you don't need word attributes.
q.structures = []  # ... you don't need structural attributes.
q.references = []  # ... you don't need reference attrs.
q.container = 's'  # Which container structure should be used?

# Using the deduplicator would NOT change the outcome. Switch off.
q.set_deduplication(off=True)

# Create a Processor object and attach it to the Query object.
# The Nonprocessor processor does nothing. You can work with the results
# yourself in the finalise method or just get the hits value from the
# query object. It is the concordance as seported by Manatee.
p = Nonprocessor()  # Create a processor object of apporpriate type.
q.processor = p  # Attach the processor to the query.
q.run()  # Run the query.

print('Query was: %s' % (q.string))
print('Corpus used: %s' % (q.corpus))
print('Query returned %d hits.' % (q.hits))

Example #2

Show file

# -*- coding: utf-8 -*-

from SeaCOW import Query, ConcordanceLoader
import json  # Just for pretty-printing.

# See sample.py for annotations of these attributes.
q = Query()
q.corpus = 'decow16a-nano'
q.string = '[word="Gartenzwerg"]'
q.max_hits = 10
q.attributes = ['word', 'tag']
q.structures = ['s']
q.references = [
    'doc.url', 'doc.bdc', 'doc.tld', 'doc.id', 'div.bpc', 's.idx', 's.type'
]
q.container = 's'
q.set_deduplication()

# The concordance loader has just one settable attribute.
p = ConcordanceLoader()
p.full_structure = True  # Convert token attributes to dicts as well, otherwise |-separated.
q.processor = p
q.run()

# Now you have a nice structured Python object in p.concordance.

# The json library just provides a convenient way of displaying the
# resulting structures.
print json.dumps(p.concordance[0:2], sort_keys=False, indent=2)

Example #3

Show file

File: dependencies.py Project: rsling/seacow

# -*- coding: utf-8 -*-

from SeaCOW import Query, ConcordanceWriter, DependencyBuilder

# Create a Query object and set whatever needs to be set.
q = Query()
q.corpus = 'decow16a-nano'  # Lower-case name of the corpusto use.
q.string = '[word="Motorsäge"]'  # A normal CQL string as used in NoSketchEngine.
q.max_hits = 10  # Maximal number of hits to return. Use when testing queries!
q.structures = ['s']  # Structure markup to export from corpus.
q.references = [
    'doc.url', 'doc.bdc', 'doc.tld', 'doc.id', 'div.bpc', 's.idx', 's.type'
]
# Which reference attributes (of structures) to export.
q.container = 's'  # Which container strutcure should be exported?
q.set_deduplication()  # Use deduplication.

# The dependency attributes are needed for DependencyBuilder() to work.
q.attributes = ['word', 'depind', 'dephd', 'deprel', 'tag', 'lemma']

# The dependency builder reconstructs (and outputs) dependency trees.
# If you want to filter structures, create a class which inherits from
# DependencyBuilder and override the filtre() method (NO TYPO there).
p = DependencyBuilder()

# The following five are 0-based indeces into q.attributes as defined above.
p.column_token = 0  # Which column contains the token?
p.column_index = 1  # Which column contains the dependency index?
p.column_head = 2  # Which column contains the dependency head index?
p.column_relation = 3  # Which column contains the dependency relation?
p.attribs = [4,

Example #4

Show file

File: iterate.py Project: rsling/seacow

# -*- coding: utf-8 -*-

from SeaCOW import Query, ConcordanceWriter, DependencyBuilder
from iterated import IterativelyFiltredDependencyBuilder

# FIRST QUERY.

# Create a Query object and set whatever needs to be set.
q = Query()
q.corpus = 'encow16a-nano'
q.string = '[word="give" & tag="VB[ZPD]"]'
q.max_hits = 100
q.attributes = ['word', 'depind', 'dephd', 'deprel', 'tag', 'lemma']
q.structures = ['s']
q.references = [
    'doc.url', 'doc.bdc', 'doc.tld', 'doc.id', 'div.bpc', 's.idx', 's.type'
]
q.container = 's'

# This enables an efficient duplicate remover using a scaling Bloom filter.
q.set_deduplication()

p = IterativelyFiltredDependencyBuilder()
p.column_token = 0
p.column_index = 1
p.column_head = 2
p.column_relation = 3
p.attribs = [4, 5]
p.fileprefix = 'give_iterated'
p.savejson = True
p.saveimage = 'png'

Example #5

Show file

# -*- coding: utf-8 -*-

# This dumps a very raw concordance format.
# It's very efficient, though.
# See samply.py for options.

from SeaCOW import Query, ConcordanceDumper

q = Query()
q.corpus          = 'decow16b'
q.string          = '[word="Holzweg"]'
q.max_hits        = 10
q.attributes      = ['word']
q.structures      = ['s']
q.references      = ['doc.url', 'doc.id', 's.idx']
q.container       = 's'
q.set_deduplication()

p                 = ConcordanceDumper()
p.filename        = 'output/holzweg.txt'
q.processor       = p
q.run()

Example #6

Show file

# -*- coding: utf-8 -*-

import random
from SeaCOW import Query, ConcordanceWriter, DependencyBuilder

random.seed(2914)

q = Query()
q.corpus          = 'precox20lda25'
q.string          = '<doc id="[0-9a-f].+">'
q.random_subset   =  0.09
q.attributes      = ['word']
q.structures      = ['s.idx', 'div.bpc', 'doc.bdc', 'doc.url', 'doc.id', 'doc.pregister', 'doc.pregbrob']
q.references      = ['doc.url', 'doc.id']
q.container       = 'doc'

p                 = ConcordanceWriter()
p.filename        = 'sample.csv'
q.processor       = p
q.run()

Example #7

Show file

# -*- coding: utf-8 -*-

# This dumps a very raw concordance format.
# It's very efficient, though.
# See samply.py for options.

from SeaCOW import Query, ConcordanceDumper

q = Query()
q.corpus = 'encow16a-nano'
q.string = '[tag="N."][word="attention"]'
q.max_hits = -1
q.attributes = ['word']
q.structures = ['s']
q.references = ['doc.url', 'doc.id', 's.idx']
q.container = 's'
q.set_deduplication()

p = ConcordanceDumper()
p.filename = 'dump.txt'
q.processor = p
q.run()