예제 #1
0
def db_connect():
    if not len(CONNECTION):
        import sqlaload as sl
        sqlalchemy_url = config_get('sqlalchemy.url')
        log.info('Using database: %s', sqlalchemy_url)
        CONNECTION.append(sl.connect(sqlalchemy_url))
    return CONNECTION[0]
예제 #2
0
def db_connect():
    if not len(CONNECTION):
        import sqlaload as sl
        sqlalchemy_url = config_get('sqlalchemy.url')
        log.info('Using database: %s', sqlalchemy_url)
        CONNECTION.append(sl.connect(sqlalchemy_url))
    return CONNECTION[0]
예제 #3
0
def etl_engine():
    from sqlaload import connect
    return connect(app.config['ETL_URL'])
예제 #4
0
import SETTINGS
import logging

import sqlaload as sl

from extract import extract
from entities import create_entities, update_entities
from load import load
from setup import setup, make_grano
from transform import transform
from network_entities import update_network_entities


if __name__ == '__main__':
    import sys
    logging.basicConfig(level=logging.DEBUG)
    assert len(sys.argv) == 3, "Usage: %s [ir_source_file] [ap_source_file]"
    ir_source_file = sys.argv[1]
    ap_source_file = sys.argv[2]
    engine = sl.connect(SETTINGS.ETL_URL)
    extract(engine, ir_source_file, ap_source_file)
    update_network_entities(engine, 'network_entities.csv')
    create_entities(engine)
    update_entities(engine, 'entities.csv')
    transform(engine)
    grano = make_grano()
    setup(engine, grano)
    load(engine, grano)
예제 #5
0
from datetime import datetime
import sqlaload as sl
import sys

def dump_table(engine, table):
    file_name = '%s-%s.csv' % (table.name,
            datetime.utcnow().strftime("%Y-%m-%d"))
    fh = open(file_name, 'wb')
    sl.dump_csv(sl.all(engine, table), fh)

if __name__ == '__main__':
    assert len(sys.argv)==2, "Usage: %s [engine-url]"
    engine = sl.connect(sys.argv[1])
    table = sl.get_table(engine, 'bund')
    dump_table(engine, table)
from lxml import etree
from pprint import pprint

import sqlaload as sl

engine = sl.connect('sqlite:///budget.db')
table = sl.get_table(engine, 'budget')
year = 2010

FIGURE_FIELDS = {
    'total': ['amount_total', 'amount_reserve_total_total', 'amount_reserve_figure_total'],
    'comm': ['amount_comm', 'amount_reserve_comm_comm', 'amount_reserve_figure_comm'],
    'pay': ['amount_pay', 'amount_reserve_pay_pay', 'amount_reserve_figure_pay']
    }

def xml_dict(file_name, depth=2):
    doc = etree.parse(file_name)
    def _node(node, depth):
        data = {'!name': node.tag, '!e': node}
        if node.tail is not None and len(node.tail.strip()):
            data[':tail'] = node.tail
        if node.text is not None and len(node.text.strip()):
            data[':text'] = node.text
        for a, v in node.attrib.items():
            data['@' + a] = v
        if depth > 0:
            for child in node:
                cd = _node(child, depth-1)
                if not child.tag in data:
                    data[child.tag] = cd
                elif isinstance(data[child.tag], list):
예제 #7
0
def etl_engine():
    from sqlaload import connect
    return connect(app.config['ETL_URL'])
예제 #8
0
파일: common.py 프로젝트: pudo/dpkg-uk25k
def db_connect():
    sqlalchemy_url = config_get('sqlalchemy.url')
    log.info('Using database: %s', sqlalchemy_url)
    return sl.connect(sqlalchemy_url)
예제 #9
0
def etl_engine():
    return sl.connect(app.config.get('ETL_URL'))
예제 #10
0
def make_engine():
    db_url = os.environ.get('FTS_URL')
    assert db_url is not None, \
        "Set FTS_URL in the environment!"
    return sl.connect(db_url)
예제 #11
0
def db_connect():
    sqlalchemy_url = config_get('sqlalchemy.url')
    log.info('Using database: %s', sqlalchemy_url)
    return sl.connect(sqlalchemy_url)
예제 #12
0
def db_connect():
    return sl.connect("postgresql:///uk25k")
예제 #13
0
    a_table = sl.get_table(engine, 'article')
    for data in sl.find(engine, a_table):
        up = {'number': data['number']}
        slug_parts = data['canonical_url'].split('/')[3:]
        if len(slug_parts) > 3:
            print slug_parts
        if len(slug_parts) == 3:
            up['ressort'], up['subressort'], _ = slug_parts
        elif len(slug_parts) == 2:
            up['ressort'], _ = slug_parts
        up['date'] = parse_date(data['date_text'])
        sl.upsert(engine, a_table, up, ['number'])


def parse_date(date_text):
    for name, num in DE_MONTHS.items():
        date_text = date_text.replace(name, num)
    date_text = date_text.replace(u"\xa0Uhr", "")
    date_text = date_text.replace("31. 09", "30. 09")
    try:
        return datetime.strptime(date_text, "%d. %m %Y, %H:%M")
    except ValueError, ve:
        return datetime.strptime(date_text, "%m/%d/%Y %H:%M %p")

    print[date_text]


if __name__ == "__main__":
    engine = sl.connect('postgresql://localhost/spon_scrape')
    articles(engine)
예제 #14
0
        up = {'number': data['number']}
        slug_parts = data['canonical_url'].split('/')[3:]
        if len(slug_parts) > 3:
            print slug_parts
        if len(slug_parts) == 3:
            up['ressort'], up['subressort'], _ = slug_parts
        elif len(slug_parts) == 2:
            up['ressort'], _ = slug_parts
        up['date'] = parse_date(data['date_text'])
        sl.upsert(engine, a_table, up, ['number'])


def parse_date(date_text):
    for name, num in DE_MONTHS.items():
        date_text = date_text.replace(name, num)
    date_text = date_text.replace(u"\xa0Uhr", "")
    date_text = date_text.replace("31. 09", "30. 09")
    try:
        return datetime.strptime(date_text, "%d. %m %Y, %H:%M")
    except ValueError, ve:
        return datetime.strptime(date_text, "%m/%d/%Y %H:%M %p")


    print [date_text]

if __name__ == "__main__":
    engine = sl.connect('postgresql://localhost/spon_scrape')
    articles(engine)