예제 #1
0
파일: vacancies.py 프로젝트: tobes/munge
def update_vacancies():
    data = {}
    fields = [
        ('prop_empty', make_bool_c),
        ('prop_empty_date', make_date_c),
        ('prop_occupied', make_bool_c),
        ('prop_occupied_date', make_date_c),
        ('prop_ba_rates', make_number_c),
        ('tenant', make_text_c),
    ]

    with open('vacancy.csv') as f:
        rows = csv.reader(f, delimiter=',')
        count = 0
        for row in rows:
            count += 1
            if count == 1:
                continue
        #    if count == 100:
        #        break
            sql = '''
            SELECT uarn FROM vacancy_updates
            WHERE ba_ref=:ba_ref AND la_code=:la_code
            '''
            result = run_sql(sql, ba_ref=row[1], la_code=row[0])
            exists = False
            uarn = None
            for r in result:
                exists = True
                uarn = r[0]

            if not exists:
                sql = '''
                INSERT INTO vacancy_updates (ba_ref, la_code)
                VALUES (:ba_ref, :la_code)
                '''
                result = run_sql(sql, ba_ref=row[1], la_code=row[0])

            updates = {}
          #  if not uarn:
          #      uarn = get_uarn(la_code=row[0], ba_ref=row[1])
          #      if uarn:
          #          updates['uarn'] = uarn

            for i in range(6):
                if row[i + 2].strip():
                    updates[fields[i][0]] = fields[i][1](row[i + 2])
            if updates:
                updates['last_updated'] = datetime.now()
            params = ' ,'.join(['%s = :%s' % (k, k) for k in updates.keys()])
            sql = '''
            UPDATE vacancy_updates SET %s
            WHERE ba_ref = :ba_ref AND la_code = :la_code
            ''' % (params)
            run_sql(sql, ba_ref=row[1], la_code=row[0], **updates)
            if count % 100 == 0:
                print count - 1, len(data)
예제 #2
0
파일: vacancies.py 프로젝트: tobes/munge
def check_ba_ref(la, ba_ref):
    ba_ref = '0*%s' % ba_ref
    result = run_sql(
        'select ba_ref from vao_list where la_code=:la and ba_ref SIMILAR TO  :ba',
        ba=ba_ref, la=la)
    for x in result:
        return x[0]
예제 #3
0
파일: import_fns.py 프로젝트: tobes/munge
 def fn(value):
     if name not in lookups:
         result = run_sql(sql)
         l = {}
         for row in result:
             l[row[0]] = row[1]
         lookups[name] = l
     return lookups[name].get(value)
예제 #4
0
파일: vacancies.py 프로젝트: tobes/munge
def update_matches():
    sql = '''
    SELECT ba_ref, la_code FROM vacancy_updates
    WHERE uarn is null
    '''
    result = run_sql(sql)
    count = 0
    match = 0
    for row in result:
        uarn = get_uarn(row[1], row[0])
        if uarn:
            sql = '''
            UPDATE vacancy_updates SET uarn = :uarn
            WHERE ba_ref = :ba_ref AND la_code = :la_code
            '''
            run_sql(sql, ba_ref=row[0], la_code=row[1], uarn=uarn)
            match += 1
        count += 1
        if count % 100 == 0:
            print count, match
    print count, match
예제 #5
0
파일: summeries.py 프로젝트: tobes/munge
def update_summary_table(table_name,
                         description,
                         dependencies=None,
                         is_view=False,
                         created=False,
                         time=None,
                         rows=None,
                         importer=None):
    if not _initiated:
        _init()
    data = {
        'name': table_name,
        'view': is_view,
        'importer': importer,
    }
    data['dependencies'] = dependencies
    if description:
        data['description'] = description
    data['updated'] = datetime.datetime.now()
    if created:
        data['created'] = datetime.datetime.now()
    if time:
        data['time'] = time
    if rows:
        data['rows'] = rows
    # does the entry exist?
    exists = False
    sql = 'SELECT * FROM table_summaries WHERE name=:name'
    results = sa_util.run_sql(sql, {'name': table_name})
    for result in results:
        exists = True
    columns = ', '.join([sa_util.quote(x) for x in data.keys()])
    values = ', '.join([':%s' % x for x in data.keys()])
    if exists:
        sql = "UPDATE table_summaries SET ({columns}) = ({values}) WHERE name=:name"
    else:
        sql = "INSERT INTO table_summaries ({columns}) VALUES ({values})"

    sql = sql.format(columns=columns, values=values)
    sa_util.run_sql(sql, **data)
예제 #6
0
파일: vacancies.py 프로젝트: tobes/munge
def get_uarn(la_code='', ba_ref=''):
    sql = '''
    SELECT uarn FROM vao_list
    WHERE trim(leading '0' from ba_ref)=:ba_ref
    '''
    if la_code:
        sql += '''
            AND la_code=:la_code
            '''
    if la_code and ba_ref[0] not in '0123456789' and len(ba_ref) > 9:
        count = 3
    else:
        count = 0
    for x in range(count + 1):
        result = run_sql(sql, ba_ref=ba_ref[x:].lstrip('0'), la_code=la_code)
        for r in result:
            return r[0]
예제 #7
0
파일: csv_util.py 프로젝트: tobes/munge
def make_csv(filename, sql, **kw):
    table = kw.get("table")
    verbose = kw.get("verbose")
    simple = kw.get("simple")
    data = kw.get("data", {})

    if not table:
        table = os.path.splitext(os.path.basename(filename))[0]
    if verbose:
        print("Processing %s" % table)
    filename = os.path.join(config.DATA_PATH, "output", filename)
    with open(filename, "w") as f:
        a = csv.writer(f, delimiter=",", dialect=csv.excel)

        result = run_sql(sql, data)
        wrote_headers = False
        count = 0
        for row in result:
            row = [csv_encode(x) for x in row]
            if not wrote_headers:
                # do here so we only need to execute the sql once
                headers = kw.get("headers")
                if not headers:
                    # no headers so get from sqlalchemy
                    headers = make_headers(result, table, simple=simple)
                a.writerows([headers])
                if verbose:
                    print("\nFields:")
                    for h in headers:
                        if simple:
                            print("\t%s" % h)
                        else:
                            print("\t%s  \t%s" % tuple(h.split(":")))
                    print
                wrote_headers = True
            a.writerows([row])
            count += 1
            if verbose and count % config.BATCH_SIZE == 0:
                print("{filename}: {count:,}".format(filename=filename, count=count))
        if verbose:
            print("%s rows written" % (count - 1))
            print
예제 #8
0
$$ LANGUAGE SQL IMMUTABLE;

DROP AGGREGATE IF EXISTS usr_median2(anyelement);

CREATE AGGREGATE usr_median2(anyelement) (
  SFUNC=array_append,
  STYPE=anyarray,
  FINALFUNC=_final_median2,
  INITCOND='{}'
);


COMMIT;
'''

run_sql(sql)


sql = '''
CREATE OR REPLACE FUNCTION percent_diff(a float8, b float8) RETURNS float8 AS $BODY$
SELECT
    CASE
        WHEN $1 > 0
            THEN ($2 / $1) - 1.0
        ELSE NULL
    END;

$BODY$
LANGUAGE SQL IMMUTABLE;

CREATE OR REPLACE FUNCTION safe_divide(numeric, numeric)
예제 #9
0
파일: csv_util.py 프로젝트: tobes/munge
def import_csv(
    reader,
    table_name,
    fields=None,
    skip_first=False,
    description=None,
    verbose=0,
    limit=None,
    keep_table=False,
    importer=None,
):
    if keep_table and table_name not in table_list():
        keep_table = False
    temp_table = config.TEMP_TABLE_STR + table_name
    count = 0
    t_fields = []
    data = []
    has_header_row = (fields is None) or skip_first
    first = True
    set_first = False
    for row in reader:
        skip = False
        if first:
            if len(row) == 1 and row[0][:1] == "#":
                if not description:
                    description = row[0][1:].strip()
                skip = True
            else:
                if fields is None:
                    fields = row
                t_fields = process_header(fields)
                t_fns = get_fns(t_fields)
                if keep_table:
                    old_fields = table_columns(table_name)
                    if fields_match(old_fields, t_fields):
                        truncate_table(table_name, verbose=verbose)
                        temp_table = table_name
                    else:
                        keep_table = False
                if not keep_table:
                    create_table(temp_table, t_fields, verbose=verbose)
                f = [field["name"] for field in t_fields if not field.get("missing")]
                insert_sql = insert_rows(temp_table, t_fields)
                set_first = True
        if not ((description or has_header_row) and first):
            row_data = dict(zip(f, row))
            for fn in t_fns:
                fn_info = t_fns[fn]
                if fn_info[1]:
                    fn_fields = fn_info[1].split("|")
                else:
                    fn_fields = [fn]
                try:
                    row_data[fn] = fn_info[0](*[row_data[x] for x in fn_fields])
                except Exception as e:
                    # FIXME log error
                    print(str(e))
                    print(fn)
                    print(row_data)
                    skip = True
            if not skip:
                data.append(row_data)
            if count % config.BATCH_SIZE == 0 and count:
                run_sql(insert_sql, data)
                data = []
                if verbose:
                    print("{table}: {count:,}".format(table=table_name, count=count))
            if not skip:
                count += 1
            if limit and count == limit:
                break
        if set_first:
            first = False
    if data:
        run_sql(insert_sql, data)

    if verbose:
        print("{table}: {count:,} rows imported".format(table=table_name, count=count))
    # Add indexes
    if not keep_table:
        build_indexes(temp_table, t_fields, verbose=verbose)
    update_summary_table(table_name, description, importer=importer, created=not keep_table)
예제 #10
0
파일: vacancies.py 프로젝트: tobes/munge
def info_table():

    sql = '''
    SELECT code FROM c_la
    '''

    results = run_sql(sql)

    for row in results:
        la_code = row[0]

        print(la_code)
        sql = '''
        DELETE FROM vacancy_info
        WHERE la_code=:la_code
        '''
        data = run_sql(sql, la_code=la_code)

        sql = '''
        INSERT INTO vacancy_info (la_code, uarn)
        SELECT la_code, uarn from vao_list
        WHERE la_code=:la_code
        '''
        data = run_sql(sql, la_code=la_code)

        sql = '''
        SELECT DISTINCT prop_empty
        FROM vacancy_updates where la_code=:la_code
        '''

        data = run_sql(sql, la_code=la_code)
        default_empty = None
        for d in data:
            if default_empty:
                default_empty = None
                break
            else:
                default_empty = not d[0]

        sql = '''
        UPDATE vacancy_info
        SET
        prop_empty = :default_empty,
        real_data = false
        WHERE la_code = :la_code
        '''

        run_sql(sql, la_code=la_code, default_empty=default_empty)

        sql = '''
        UPDATE vacancy_info vi
        SET
        prop_empty=v.prop_empty,
        prop_occupied_date=v.prop_occupied_date,
        prop_empty_date=v.prop_empty_date,
        tenant=v.tenant,
        real_data = true
        FROM (
            SELECT
                uarn,
                prop_empty,
                prop_occupied_date,
                prop_empty_date,
                tenant
            FROM vacancy_updates
            WHERE la_code = :la_code
        ) AS v
        WHERE vi.uarn = v.uarn
        '''

        run_sql(sql, la_code=la_code)
        print('done')
예제 #11
0
파일: vacancies.py 프로젝트: tobes/munge
def match_uarn():


    sql = """
UPDATE vacancy_updates
SET uarn=subquery.uarn
FROM (

    SELECT b.uarn, b.ba_ref, b.la_code
    FROM vao_list b
    JOIN vacancy_updates v
    ON trim(leading '0' from v.ba_ref) = trim(leading '0' from b.ba_ref)
    WHERE v.uarn is null

) AS subquery
WHERE vacancy_updates.ba_ref=subquery.ba_ref
AND vacancy_updates.la_code=subquery.la_code;
    """
    run_sql(sql)

    sql = """
UPDATE vacancy_updates
SET uarn=subquery.uarn
FROM (

    SELECT b.uarn, v.ba_ref, v.la_code
    FROM vao_list b
    JOIN vacancy_updates v
    ON ltrim(ltrim(v.ba_ref, 'N'''), '0') = ltrim(b.ba_ref, '0')
    AND b.la_code = v.la_code
    WHERE v.uarn is null
    ORDER BY uarn


) AS subquery
WHERE vacancy_updates.ba_ref=subquery.ba_ref
AND vacancy_updates.la_code=subquery.la_code;
    """
    run_sql(sql)



    sql = """
UPDATE vacancy_updates
SET uarn=subquery.uarn
FROM (

 SELECT b.uarn, v.ba_ref, v.la_code
FROM vacancy_updates v
JOIN vao_list b
ON v.ba_ref = right(b.ba_ref, length(v.ba_ref))
AND b.la_code = v.la_code
WHERE v.uarn is null
AND length(v.ba_ref) > 6
AND v.la_code in
('E07000195', 'W06000006', 'E07000085', 'E07000087',
'E06000056', 'E07000089', 'E06000043', 'E07000099',
'E06000034', 'E07000203', 'E07000102', 'E07000117',
'E07000135', 'W06000004', 'E07000193', 'W06000011',
'W06000013', 'W06000024', 'E07000200', 'E07000067',
'E07000032', 'E07000080', 'W06000003')

) AS subquery
WHERE vacancy_updates.ba_ref=subquery.ba_ref
AND vacancy_updates.la_code=subquery.la_code;
    """
    run_sql(sql)
예제 #12
0
파일: vacancies.py 프로젝트: tobes/munge
import xlrd
from os import listdir
from os.path import isfile, join
from datetime import datetime
from decimal import Decimal

import unicodecsv as csv

from sa_util import run_sql

DIR = os.path.join(config.DATA_PATH, 'vacancy')

la_codes = [x[0] for x in run_sql('select code from c_la')]


def make_date(row, idx, date_mode):
    x = row[idx].ctype
    if x in [0, 5, 6]:
        return None
    v = row[idx].value
    if x in [3]:
        return datetime(*xlrd.xldate_as_tuple(v, date_mode))
    elif x in [1]:
        v = v.strip()
        if v == '':
            return None
        v = v.replace('.', '/').split(' ')[0]
        return datetime.strptime(v, '%d/%m/%Y')
    else:
        raise Exception('INVALID DATE')