Python Document Examples

Programming Language: Python

Namespace/Package Name: djape.client

Class/Type: Document

Examples at hotexamples.com: 2

Python Document - 2 examples found. These are the top rated real world Python examples of djape.client.Document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

extend(2)

Example #1

Show file

File: importer.py Project: devfort/wildlifenearyou

def import_into_xapian():
    client = Client(settings.XAPIAN_BASE_URL, settings.XAPIAN_SPECIES_DB)
    client.newdb([{
        'field_name': 'common_name',
        'store': True,
        'freetext': {'language': 'en'} # language used for stemming
    }, {
        'field_name': 'scientific_name',
        'store': True,
        'freetext': {'language': 'en'} # Remove when stemming bug is fixed
    }, {
        'field_name': 'freebase_id',
        'store': True,
        'freetext': {'language': 'en'}
    }], overwrite=True) # replaces existing index if there is one
    # We have a database!
    
    # Now we create documents
    queue = []
    count = 0
    for row in import_from_file():
        if not row['scientific_name']:
            continue
        count += 1
        doc = Document()
        # doc.id = 'X' will over-ride auto ID /AND/ cause replace if exists
        doc.extend([
            ('common_name', row['name']),
            ('scientific_name', row['scientific_name']),
            ('freebase_id', row['id']),
        ])
        # client.add(doc) - would work here
        queue.append(doc)
        if len(queue) >= 1000:
            client.bulkadd(queue)
            queue = []
            print "Imported %d" % count
    # Catch the remainder
    if queue:
        client.bulkadd(queue)

Example #2

Show file

File: importer.py Project: devfort/wildlifenearyou

def import_into_xapian():
    client = Client(
        settings.XAPIAN_BASE_URL, settings.XAPIAN_LOCATION_DB
    )
    try:
        client.deldb()
    except: # BAD: Naked except
        pass
    client.newdb([{
        'field_name': 'place_name',
        'store': True,
        'freetext': {'language': 'en'} # language used for stemming
    }, {
        'field_name': 'county', # Maps to admin_name2
        'store': True,
        'freetext': {}
    }, {
        'field_name': 'country_code',
        'store': True,
        'freetext': {} # TODO: Use exact match here, not yet implemented
    }, {
        'field_name': 'postal_code',
        'store': True,
        'freetext': {} # TODO: Can we do prefix search only?
    }, {
        'field_name': 'description',
        'store': True, # stored but not indexed
    }, {
        'field_name': 'latlon',
        'store': True,
        'type': 'geo',
        'geo': {}, # no options yet
    }])
    # We have a database!
    
    # We throw away anything that results in a description that we have 
    # already used for something else. There are only 213 (out of 27,000)
    # where a duplicate description has more than one lat/lon pair - so 
    # we've chosen to just discard those.
    seen_descriptions = set()
    
    # Now we create documents
    queue = []
    count = 0
    for row in import_from_file():
        # Some (3) of them don't have lat or lon - ignore those
        if not (row['latitude'] and row['longitude']):
            continue
        description = make_description(row)
        if description in seen_descriptions:
            continue
        seen_descriptions.add(description)
        count += 1
        doc = Document()
        # doc.id = 'X' will over-ride auto ID /AND/ cause replace if exists
        doc.extend([
            ('place_name', row['place_name']),
            ('county', row['admin_name2']),
            ('postal_code', row['postal_code']),
            ('country_code', row['country_code']),
            ('description', description),
            ('latlon', '%s %s' % (
                row['latitude'], row['longitude'],
            )),
            # TODO: Ignoring accuracy field for the moment
        ])
        # client.add(doc) - would work here
        queue.append(doc)
        if len(queue) >= 1000:
            client.bulkadd(queue)
            queue = []
            print "Imported %d" % count
    # Catch the remainder
    if queue:
        client.bulkadd(queue)