Python DumpTruck.DumpTruck Examples

Programming Language: Python

Namespace/Package Name: dumptruck

Class/Type: DumpTruck

Method/Function: DumpTruck

Examples at hotexamples.com: 9

Python DumpTruck.DumpTruck - 9 examples found. These are the top rated real world Python examples of dumptruck.DumpTruck.DumpTruck extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(26)

DumpTruck(9)

save_var(8)

dump(7)

tables(7)

execute(5)

get_var(5)

commit(4)

drop(4)

create_index(2)

create_table(2)

insert(2)

upsert(2)

Example #1

Show file

File: finalip_parse.py Project: hertzg/scott

    trs = html.xpath(
        '//table[@style="border-collapse: collapse; width: 100%;"]/descendant::tr'
    )

    def do_row(tr):
        try:
            return l.parse_row(tr)
        except:
            print tostring(tr)
            raise

    return map(do_row, trs[2:])


# Schema
dt = DumpTruck(dbname='/tmp/finalip.db')
dt.create_table({u'DA Number': u'NAE-2009-01067'},
                'finalip',
                if_not_exists=True)
dt.create_index(['Da Number'], 'finalip', unique=True, if_not_exists=True)

# Skip finished stuff
pages = set([(row['Year'], row['Month'], row['Page'])
             for row in dt.execute('SELECT Year, Month, Page FROM finalip')])

# Populate
for dirname, subdirnames, filenames in os.walk(
        os.path.join(os.environ['READER_ROOT'], '..', 'finalips')):
    if subdirnames != []:
        continue
    for filename in filenames:

Example #2

Show file

def _connect(dbname='scraperwiki.sqlite'):
    'Initialize the database (again). This is mainly for testing'
    global dt
    dt = DumpTruck(dbname=dbname, adapt_and_convert=False)

Example #3

Show file

File: wgets-from-list.py Project: Liam-F/secscraper

# coding: utf-8
from dumptruck import DumpTruck
import csv, os, glob

_here = os.path.split(__file__)[0]

store = DumpTruck(dbname="db/documents.db")

already = dict([(a[26:36], a) for a in [
    os.path.split(a)[1]
    for a in glob.glob("/home/martin/Dropbox/blackrock-scraper/data/*")
]])

already_downloaded = dict([(a[26:36], a) for a in [
    os.path.split(a)[1] for a in glob.glob(os.path.join(_here, "data/html/*"))
]])

print len(already_downloaded.items()), len(already.items())
#s
#
# Liste aller Gesellschaften mit aktuellstem Berichtsdatum und Anzahl der Berichte
dt = csv.DictWriter(open(os.path.join(_here, "data/tables", "ciks.csv"), "w"),
                    [
                        'act', 'num', 'cik', 'name', 'filename', 'already',
                        'already_downloaded', 'exists', 'link'
                    ],
                    delimiter=";")
dt.writerow(
    dict(act="Aktuellster Bericht",
         num="Anzahl der Berichte",
         cik="Central Index Key",

Example #4

Show file

import logging, sys, os
from lxml import etree
import requests
from dumptruck import DumpTruck, Pickle

_here = os.path.split(__file__)[0]

store = DumpTruck(dbname=os.path.join(_here, "db/documents.db"))

parser = etree.HTMLParser()


def getTree(url):
    return etree.parse(url, etree.HTMLParser())


logger = logging.getLogger(os.path.split(__file__)[1])
logging.basicConfig(level=logging.DEBUG, file=sys.stderr)


def get_nq_for_cik(cik):
    try:
        tree = getTree(
            "http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=%s&type=N-Q%%25&dateb=&owner=include&start=0&count=40&output=atom"
            % cik)
    except Exception, e:
        logger.error("Error searching for CIK %s:%s" % (cik, e))
        pass
    for entry in tree.xpath("//entry"):
        link = entry.xpath("link/@href")[0]
        date = entry.xpath("updated/text()")[0]

Example #5

Show file

def main():
    dt = DumpTruck(dbname='metrics.db')
    dt.create_table({'portal': 'abc', 'date': datetime.date.today()}, 'series')
    dt.create_index(['portal', 'date'], 'series')
    dt.upsert(list(table()), 'series')

Example #6

Show file

File: pipelines.py Project: elcpls/auction-scraper

    def open_spider(self, spider):
        self.dt = DumpTruck(dbname=settings['DB_PATH'], auto_commit=True)

        id_data = self.dt.execute('SELECT id FROM auctions')
        self.ids = [x['id'] for x in id_data]

Example #7

Show file

File: db.py Project: tlevine/socrata-catalog

#!/usr/bin/env python2
import os, json
from dumptruck import DumpTruck

dt = DumpTruck(dbname='/tmp/catalog.db')

# Create a unique index on `identifier`.
dt.execute('''
CREATE TABLE IF NOT EXISTS "catalog" (
  "portal" TEXT NOT NULL,
  "identifier" TEXT NOT NULL,
  PRIMARY KEY ("portal", "identifier")
);''')

for data_json in os.listdir('catalogs'):
    # Load into memory.
    data = json.load(open(os.path.join('catalogs', data_json)))[1:]

    # Add the portal.
    portal = data_json.replace('.json', '')
    for row in data:
        row['portal'] = portal

    # Put in the database.
    dt.insert(data, 'catalog')

Example #8

Show file

def _connect(dbname=DATABASE_NAME, timeout=DATABASE_TIMEOUT):
    'Initialize the database (again). This is mainly for testing'
    global dt
    dt = DumpTruck(dbname=dbname, adapt_and_convert=False, timeout=timeout)

Example #9

Show file

File: join.py Project: tlevine/scott-map-prototype

#!/usr/bin/env python
import json
from dumptruck import DumpTruck
dt = DumpTruck(dbname='applications.db')


def scott_data():
    sql = '''
    SELECT "parish", sum("acreage") AS 'acreage'
    FROM application
    WHERE "type" = 'impact' AND "parish" != ''
    GROUP BY "parish";
    '''

    return {
        row['parish'].upper().replace('SAINT', 'ST'):
        (row['parish'], row['acreage'])
        for row in dt.execute(sql)
    }


scott = scott_data()
parishes = json.load(open('parishes.json'))

max_impacted_acres = max([v[1] for v in scott.values()])
for feature in parishes['features']:
    feature['properties']['impacted_acres'] = scott.get(
        feature['properties']['COUNTY'], (None, 0))[1]
    feature['properties']['impacted_acres_prop_max'] = scott.get(
        feature['properties']['COUNTY'], (None, 0))[1] / max_impacted_acres