def update(): with closing(get_connection()) as connection: total = get_total('records.csv') with open( 'records.csv', 'r', encoding='iso-8859-1', newline='', ) as resource: rows = csv.reader(resource, delimiter=u';') for row in tqdm(rows, total=total): with closing(connection.cursor()) as cursor: query = ''' SELECT id FROM records WHERE road = %(road)s AND number = %(number)s AND zip_code = %(zip_code)s AND city = %(city)s ''' cursor.execute( query, { 'road': row[2], 'number': row[3], 'zip_code': row[0], 'city': row[1], }, ) record = cursor.fetchone() if record: continue query = ''' INSERT INTO records (road, number, zip_code, city) VALUES (%(road)s, %(number)s, %(zip_code)s, %(city)s) ''' cursor.execute( query, { 'road': row[2], 'number': row[3], 'zip_code': row[0], 'city': row[1], }, ) connection.commit()
def perform(id): try: with closing(get_connection()) as connection: with closing(connection.cursor()) as cursor: query = 'SELECT * FROM records WHERE id = %(id)s' cursor.execute( query, { 'id': id, }, ) record = cursor.fetchone() if record['details']: return details = get_details( record['road'], record['number'], record['zip_code'], record['city'], ) if not details: return details = dumps( details, ensure_ascii=False, indent=' ', sort_keys=True, ) query = ''' UPDATE records SET details = %(details)s WHERE id = %(id)s ''' cursor.execute( query, { 'details': details, 'id': id, }, ) connection.commit() except Exception: sentry = get_sentry() if sentry: sentry.captureException()
def insert(): with closing(get_connection()) as connection: with open( 'records.csv', 'r', encoding='iso-8859-1', newline='', ) as resource: with closing(connection.cursor()) as cursor: cursor.copy_from( resource, 'records', columns=('zip_code', 'city', 'road', 'number'), null='\\n', sep=';', size=999999999, ) connection.commit()
def test(): with closing(get_connection()) as connection: with closing(connection.cursor('cursor')) as cursor: query = ''' SELECT * FROM records ORDER BY RANDOM() LIMIT 5 OFFSET 0 ''' cursor.execute(query) for record in cursor: details = get_details( record['road'], record['number'], record['zip_code'], record['city'], ) pprint(details)
def details(): r = ResQ() with closing(get_connection()) as connection: total = 0 with closing(connection.cursor()) as cursor: query = ''' SELECT COUNT(id) AS count FROM records WHERE details IS NULL ''' cursor.execute(query) total = cursor.fetchone()['count'] with closing(connection.cursor('cursor')) as cursor: query = ''' SELECT * FROM records WHERE details IS NULL ''' cursor.execute(query) for record in tqdm(cursor, total=total): r.enqueue(Record, record['id'])
def main(): stocks = config.stocks get_data_csv_name = config.get_data_csv_name db_csv_name = config.db_csv_name subprocess.call(["rm", get_data_csv_name]) subprocess.call(["rm", db_csv_name]) number = 1 for stock_index in range(len(stocks) - 1): qiita_api = os.environ['QIITA_API'] url = "https://qiita.com/api/v2/items" h = {"Authorization": "Bearer " + qiita_api} p = { 'per_page': 100, 'query': 'stocks:<{} stocks:>{}'.format(str(int(stocks[stock_index]) + 1), stocks[stock_index + 1]) } response = requests.get(url, params=p, headers=h) response_list = json.loads(response.text) for index, item in enumerate(response_list): created_at = response_list[index]["created_at"] article_id = response_list[index]["id"] likes_count = response_list[index]["likes_count"] tags = [] for tag_index in range(5): try: tags.append( response_list[index]["tags"][tag_index]["name"]) except IndexError: tags.append(None) title = response_list[index]["title"] updated_at = response_list[index]["updated_at"] url = response_list[index]["url"] user_id = response_list[index]["user"]["id"] number = utilities.write_csv(get_data_csv_name, number, article_id, user_id, title, likes_count, url, tags, created_at, updated_at) utilities.delete_csv_row(get_data_csv_name, db_csv_name) dt_now = datetime.datetime.now() file = open('update_log.txt', 'a') file.write(str(dt_now) + "\n") file.close() conn = utilities.get_connection() cur = conn.cursor() cur.execute('DELETE FROM update_time') cur.execute('INSERT INTO update_time VALUES (' + str(dt_now.year) + ',' + str(dt_now.month) + ',' + str(dt_now.day) + ')') cur.execute('DELETE FROM articles') f = open('db.csv', 'r') cur.copy_from(f, 'articles', sep=',', null='\\N') conn.commit() cur.close() conn.close()
def bootstrap(): with closing(get_connection()) as connection: with closing(connection.cursor()) as cursor: query = 'DROP SCHEMA IF EXISTS public CASCADE' cursor.execute(query) query = 'CREATE SCHEMA IF NOT EXISTS public' cursor.execute(query) connection.commit() with closing(connection.cursor()) as cursor: query = ''' CREATE TABLE IF NOT EXISTS records ( id INTEGER NOT NULL, road CHARACTER VARYING(255) NOT NULL, number CHARACTER VARYING(255) NOT NULL, zip_code CHARACTER VARYING(255) NOT NULL, city CHARACTER VARYING(255) NOT NULL, details json NULL ) ''' cursor.execute(query) query = ''' CREATE SEQUENCE records_id_sequence START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1 ''' cursor.execute(query) query = ''' ALTER TABLE records ALTER COLUMN id SET DEFAULT nextval(\'records_id_sequence\'::regclass) ''' cursor.execute(query) query = ''' ALTER TABLE records ADD CONSTRAINT records_id_constraint PRIMARY KEY (id) ''' cursor.execute(query) query = ''' CREATE INDEX records_road ON records USING btree (road) ''' cursor.execute(query) query = ''' CREATE INDEX records_number ON records USING btree (number) ''' cursor.execute(query) query = ''' CREATE INDEX records_zip_code ON records USING btree (zip_code) ''' cursor.execute(query) query = ''' CREATE INDEX records_city ON records USING btree (city) ''' cursor.execute(query) connection.commit()
def report(): with closing(get_connection()) as connection: total = 0 with closing(connection.cursor()) as cursor: query = ''' SELECT COUNT(id) AS count FROM records ''' cursor.execute(query) total = cursor.fetchone()['count'] with open( 'report.csv', 'w', encoding='iso-8859-1', newline='', ) as resource: rows = csv.writer(resource, delimiter=u';') rows.writerow([ 'Road', 'Number', 'Zip Code', 'City', 'Name', 'Addresses :: Primary', 'Addresses :: Secondary', 'Addresses :: Zip Code', 'Telephone', 'Fax', 'Email', 'Others :: IBAN', 'Others :: Account Number', 'Others :: Client Number', 'Others :: ESCHKG ID', ]) with closing(connection.cursor('cursor')) as cursor: query = ''' SELECT * FROM records ORDER BY id ASC ''' cursor.execute(query) for record in tqdm(cursor, total=total): details = None if record['details']: if 'error' not in record['details']: details = record['details'] rows.writerow([ record['road'], record['number'], record['zip_code'], record['city'], details['name'] if details else '', details['addresses']['primary'] if details else '', details['addresses']['secondary'] if details else '', details['addresses']['zip_code'] if details else '', details['tel'] if details else '', details['fax'] if details else '', details['email'] if details else '', details['others']['iban'] if details else '', details['others']['account_number'] if details else '', details['others']['client_number'] if details else '', details['others']['eschkg_id'] if details else '', ])