def update_vacancies(): data = {} fields = [ ('prop_empty', make_bool_c), ('prop_empty_date', make_date_c), ('prop_occupied', make_bool_c), ('prop_occupied_date', make_date_c), ('prop_ba_rates', make_number_c), ('tenant', make_text_c), ] with open('vacancy.csv') as f: rows = csv.reader(f, delimiter=',') count = 0 for row in rows: count += 1 if count == 1: continue # if count == 100: # break sql = ''' SELECT uarn FROM vacancy_updates WHERE ba_ref=:ba_ref AND la_code=:la_code ''' result = run_sql(sql, ba_ref=row[1], la_code=row[0]) exists = False uarn = None for r in result: exists = True uarn = r[0] if not exists: sql = ''' INSERT INTO vacancy_updates (ba_ref, la_code) VALUES (:ba_ref, :la_code) ''' result = run_sql(sql, ba_ref=row[1], la_code=row[0]) updates = {} # if not uarn: # uarn = get_uarn(la_code=row[0], ba_ref=row[1]) # if uarn: # updates['uarn'] = uarn for i in range(6): if row[i + 2].strip(): updates[fields[i][0]] = fields[i][1](row[i + 2]) if updates: updates['last_updated'] = datetime.now() params = ' ,'.join(['%s = :%s' % (k, k) for k in updates.keys()]) sql = ''' UPDATE vacancy_updates SET %s WHERE ba_ref = :ba_ref AND la_code = :la_code ''' % (params) run_sql(sql, ba_ref=row[1], la_code=row[0], **updates) if count % 100 == 0: print count - 1, len(data)
def check_ba_ref(la, ba_ref): ba_ref = '0*%s' % ba_ref result = run_sql( 'select ba_ref from vao_list where la_code=:la and ba_ref SIMILAR TO :ba', ba=ba_ref, la=la) for x in result: return x[0]
def fn(value): if name not in lookups: result = run_sql(sql) l = {} for row in result: l[row[0]] = row[1] lookups[name] = l return lookups[name].get(value)
def update_matches(): sql = ''' SELECT ba_ref, la_code FROM vacancy_updates WHERE uarn is null ''' result = run_sql(sql) count = 0 match = 0 for row in result: uarn = get_uarn(row[1], row[0]) if uarn: sql = ''' UPDATE vacancy_updates SET uarn = :uarn WHERE ba_ref = :ba_ref AND la_code = :la_code ''' run_sql(sql, ba_ref=row[0], la_code=row[1], uarn=uarn) match += 1 count += 1 if count % 100 == 0: print count, match print count, match
def update_summary_table(table_name, description, dependencies=None, is_view=False, created=False, time=None, rows=None, importer=None): if not _initiated: _init() data = { 'name': table_name, 'view': is_view, 'importer': importer, } data['dependencies'] = dependencies if description: data['description'] = description data['updated'] = datetime.datetime.now() if created: data['created'] = datetime.datetime.now() if time: data['time'] = time if rows: data['rows'] = rows # does the entry exist? exists = False sql = 'SELECT * FROM table_summaries WHERE name=:name' results = sa_util.run_sql(sql, {'name': table_name}) for result in results: exists = True columns = ', '.join([sa_util.quote(x) for x in data.keys()]) values = ', '.join([':%s' % x for x in data.keys()]) if exists: sql = "UPDATE table_summaries SET ({columns}) = ({values}) WHERE name=:name" else: sql = "INSERT INTO table_summaries ({columns}) VALUES ({values})" sql = sql.format(columns=columns, values=values) sa_util.run_sql(sql, **data)
def get_uarn(la_code='', ba_ref=''): sql = ''' SELECT uarn FROM vao_list WHERE trim(leading '0' from ba_ref)=:ba_ref ''' if la_code: sql += ''' AND la_code=:la_code ''' if la_code and ba_ref[0] not in '0123456789' and len(ba_ref) > 9: count = 3 else: count = 0 for x in range(count + 1): result = run_sql(sql, ba_ref=ba_ref[x:].lstrip('0'), la_code=la_code) for r in result: return r[0]
def make_csv(filename, sql, **kw): table = kw.get("table") verbose = kw.get("verbose") simple = kw.get("simple") data = kw.get("data", {}) if not table: table = os.path.splitext(os.path.basename(filename))[0] if verbose: print("Processing %s" % table) filename = os.path.join(config.DATA_PATH, "output", filename) with open(filename, "w") as f: a = csv.writer(f, delimiter=",", dialect=csv.excel) result = run_sql(sql, data) wrote_headers = False count = 0 for row in result: row = [csv_encode(x) for x in row] if not wrote_headers: # do here so we only need to execute the sql once headers = kw.get("headers") if not headers: # no headers so get from sqlalchemy headers = make_headers(result, table, simple=simple) a.writerows([headers]) if verbose: print("\nFields:") for h in headers: if simple: print("\t%s" % h) else: print("\t%s \t%s" % tuple(h.split(":"))) print wrote_headers = True a.writerows([row]) count += 1 if verbose and count % config.BATCH_SIZE == 0: print("{filename}: {count:,}".format(filename=filename, count=count)) if verbose: print("%s rows written" % (count - 1)) print
$$ LANGUAGE SQL IMMUTABLE; DROP AGGREGATE IF EXISTS usr_median2(anyelement); CREATE AGGREGATE usr_median2(anyelement) ( SFUNC=array_append, STYPE=anyarray, FINALFUNC=_final_median2, INITCOND='{}' ); COMMIT; ''' run_sql(sql) sql = ''' CREATE OR REPLACE FUNCTION percent_diff(a float8, b float8) RETURNS float8 AS $BODY$ SELECT CASE WHEN $1 > 0 THEN ($2 / $1) - 1.0 ELSE NULL END; $BODY$ LANGUAGE SQL IMMUTABLE; CREATE OR REPLACE FUNCTION safe_divide(numeric, numeric)
def import_csv( reader, table_name, fields=None, skip_first=False, description=None, verbose=0, limit=None, keep_table=False, importer=None, ): if keep_table and table_name not in table_list(): keep_table = False temp_table = config.TEMP_TABLE_STR + table_name count = 0 t_fields = [] data = [] has_header_row = (fields is None) or skip_first first = True set_first = False for row in reader: skip = False if first: if len(row) == 1 and row[0][:1] == "#": if not description: description = row[0][1:].strip() skip = True else: if fields is None: fields = row t_fields = process_header(fields) t_fns = get_fns(t_fields) if keep_table: old_fields = table_columns(table_name) if fields_match(old_fields, t_fields): truncate_table(table_name, verbose=verbose) temp_table = table_name else: keep_table = False if not keep_table: create_table(temp_table, t_fields, verbose=verbose) f = [field["name"] for field in t_fields if not field.get("missing")] insert_sql = insert_rows(temp_table, t_fields) set_first = True if not ((description or has_header_row) and first): row_data = dict(zip(f, row)) for fn in t_fns: fn_info = t_fns[fn] if fn_info[1]: fn_fields = fn_info[1].split("|") else: fn_fields = [fn] try: row_data[fn] = fn_info[0](*[row_data[x] for x in fn_fields]) except Exception as e: # FIXME log error print(str(e)) print(fn) print(row_data) skip = True if not skip: data.append(row_data) if count % config.BATCH_SIZE == 0 and count: run_sql(insert_sql, data) data = [] if verbose: print("{table}: {count:,}".format(table=table_name, count=count)) if not skip: count += 1 if limit and count == limit: break if set_first: first = False if data: run_sql(insert_sql, data) if verbose: print("{table}: {count:,} rows imported".format(table=table_name, count=count)) # Add indexes if not keep_table: build_indexes(temp_table, t_fields, verbose=verbose) update_summary_table(table_name, description, importer=importer, created=not keep_table)
def info_table(): sql = ''' SELECT code FROM c_la ''' results = run_sql(sql) for row in results: la_code = row[0] print(la_code) sql = ''' DELETE FROM vacancy_info WHERE la_code=:la_code ''' data = run_sql(sql, la_code=la_code) sql = ''' INSERT INTO vacancy_info (la_code, uarn) SELECT la_code, uarn from vao_list WHERE la_code=:la_code ''' data = run_sql(sql, la_code=la_code) sql = ''' SELECT DISTINCT prop_empty FROM vacancy_updates where la_code=:la_code ''' data = run_sql(sql, la_code=la_code) default_empty = None for d in data: if default_empty: default_empty = None break else: default_empty = not d[0] sql = ''' UPDATE vacancy_info SET prop_empty = :default_empty, real_data = false WHERE la_code = :la_code ''' run_sql(sql, la_code=la_code, default_empty=default_empty) sql = ''' UPDATE vacancy_info vi SET prop_empty=v.prop_empty, prop_occupied_date=v.prop_occupied_date, prop_empty_date=v.prop_empty_date, tenant=v.tenant, real_data = true FROM ( SELECT uarn, prop_empty, prop_occupied_date, prop_empty_date, tenant FROM vacancy_updates WHERE la_code = :la_code ) AS v WHERE vi.uarn = v.uarn ''' run_sql(sql, la_code=la_code) print('done')
def match_uarn(): sql = """ UPDATE vacancy_updates SET uarn=subquery.uarn FROM ( SELECT b.uarn, b.ba_ref, b.la_code FROM vao_list b JOIN vacancy_updates v ON trim(leading '0' from v.ba_ref) = trim(leading '0' from b.ba_ref) WHERE v.uarn is null ) AS subquery WHERE vacancy_updates.ba_ref=subquery.ba_ref AND vacancy_updates.la_code=subquery.la_code; """ run_sql(sql) sql = """ UPDATE vacancy_updates SET uarn=subquery.uarn FROM ( SELECT b.uarn, v.ba_ref, v.la_code FROM vao_list b JOIN vacancy_updates v ON ltrim(ltrim(v.ba_ref, 'N'''), '0') = ltrim(b.ba_ref, '0') AND b.la_code = v.la_code WHERE v.uarn is null ORDER BY uarn ) AS subquery WHERE vacancy_updates.ba_ref=subquery.ba_ref AND vacancy_updates.la_code=subquery.la_code; """ run_sql(sql) sql = """ UPDATE vacancy_updates SET uarn=subquery.uarn FROM ( SELECT b.uarn, v.ba_ref, v.la_code FROM vacancy_updates v JOIN vao_list b ON v.ba_ref = right(b.ba_ref, length(v.ba_ref)) AND b.la_code = v.la_code WHERE v.uarn is null AND length(v.ba_ref) > 6 AND v.la_code in ('E07000195', 'W06000006', 'E07000085', 'E07000087', 'E06000056', 'E07000089', 'E06000043', 'E07000099', 'E06000034', 'E07000203', 'E07000102', 'E07000117', 'E07000135', 'W06000004', 'E07000193', 'W06000011', 'W06000013', 'W06000024', 'E07000200', 'E07000067', 'E07000032', 'E07000080', 'W06000003') ) AS subquery WHERE vacancy_updates.ba_ref=subquery.ba_ref AND vacancy_updates.la_code=subquery.la_code; """ run_sql(sql)
import xlrd from os import listdir from os.path import isfile, join from datetime import datetime from decimal import Decimal import unicodecsv as csv from sa_util import run_sql DIR = os.path.join(config.DATA_PATH, 'vacancy') la_codes = [x[0] for x in run_sql('select code from c_la')] def make_date(row, idx, date_mode): x = row[idx].ctype if x in [0, 5, 6]: return None v = row[idx].value if x in [3]: return datetime(*xlrd.xldate_as_tuple(v, date_mode)) elif x in [1]: v = v.strip() if v == '': return None v = v.replace('.', '/').split(' ')[0] return datetime.strptime(v, '%d/%m/%Y') else: raise Exception('INVALID DATE')