def conditional_constraint_and_index_reconstructor(options): if 'measure' in options and options['measure']: # This is an optimisation that only makes sense when we're # updating the entire table. yield else: yield utils.constraint_and_index_reconstructor('frontend_measurevalue')
def test_reconstructor_does_work(self): from django.db import connection from common.utils import constraint_and_index_reconstructor with connection.cursor() as cursor: # Set up a table cursor.execute("CREATE TABLE firmness (id integer PRIMARY KEY)") cursor.execute(""" CREATE TABLE tofu ( id integer PRIMARY KEY, brand varchar, firmness_id integer REFERENCES firmness (id)) """) cursor.execute("CLUSTER tofu USING tofu_pkey") cursor.execute("CREATE INDEX ON tofu (brand)") with constraint_and_index_reconstructor('tofu'): cursor.execute( "SELECT count(*) FROM pg_indexes WHERE tablename = 'tofu'" ) self.assertEqual(cursor.fetchone()[0], 0) cursor.execute( "SELECT count(*) FROM pg_indexes WHERE tablename = 'tofu'" ) self.assertEqual(cursor.fetchone()[0], 2) self.assertEqual(_cluster_count(cursor), 1)
def test_reconstructor_works_even_when_exception_thrown(self): with connection.cursor() as cursor: # Set up a table cursor.execute("CREATE TABLE firmness (id integer PRIMARY KEY)") cursor.execute(""" CREATE TABLE tofu ( id integer PRIMARY KEY, brand varchar, firmness_id integer REFERENCES firmness (id)) """) cursor.execute("CLUSTER tofu USING tofu_pkey") cursor.execute("CREATE INDEX ON tofu (brand)") class BadThingError(Exception): pass with self.assertRaises(BadThingError): with constraint_and_index_reconstructor("tofu"): raise BadThingError( "3.6 roentgen; not great, not terrible") cursor.execute( "SELECT count(*) FROM pg_indexes WHERE tablename = 'tofu'") self.assertEqual(cursor.fetchone()[0], 2) self.assertEqual(_cluster_count(cursor), 1)
def conditional_constraint_and_index_reconstructor(options): if options["measure"]: # This is an optimisation that only makes sense when we're # updating the entire table. yield else: with utils.constraint_and_index_reconstructor("frontend_measurevalue"): yield
def test_reconstructor_does_work(self): from django.db import connection from common.utils import constraint_and_index_reconstructor start_count = Measure.objects.count() with connection.cursor() as cursor: cursor.execute("SELECT COUNT(*) FROM pg_indexes") old_count = cursor.fetchone()[0] with constraint_and_index_reconstructor('frontend_measurevalue'): Measure.objects.all().delete() cursor.execute("SELECT COUNT(*) FROM pg_indexes") new_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM pg_indexes") after_count = cursor.fetchone()[0] self.assertLess(Measure.objects.count(), start_count) self.assertLess(new_count, old_count) self.assertEqual(old_count, after_count)
def fill_views(self): paths = [] if self.view: for path in self.view_paths: if self.view in path: paths.append(path) break else: paths = self.view_paths pool = Pool(processes=len(paths)) pool_results = [] prescribing_date = ImportLog.objects.latest_in_category( 'prescribing').current_at.strftime('%Y-%m-%d') for view in paths: if self.view and self.view not in view: continue # Perform bigquery parts of operation in parallel result = pool.apply_async( query_and_export, [self.dataset, view, prescribing_date]) pool_results.append(result) pool.close() pool.join() # wait for all worker processes to exit for result in pool_results: tablename, gcs_uri = result.get() f = download_and_unzip(gcs_uri) copy_str = "COPY %s(%s) FROM STDIN " copy_str += "WITH (FORMAT CSV)" fieldnames = f.readline().split(',') with connection.cursor() as cursor: with utils.constraint_and_index_reconstructor(tablename): self.log("Deleting from table...") cursor.execute("DELETE FROM %s" % tablename) self.log("Copying CSV to postgres...") try: cursor.copy_expert(copy_str % ( tablename, ','.join(fieldnames)), f) except Exception: import shutil shutil.copyfile(f.name, "/tmp/error") raise f.close() self.log("-------------")
def download_and_import(self, table): '''Download table from storage and import into local database. We sort the downloaded file with `sort` rather than in BigQuery, because we hit resource limits when we try to do so. See #698 and #711 for discussion. ''' table_id = table.table_id storage_prefix = 'hscic/views/{}-'.format(table_id) exporter = TableExporter(table, storage_prefix) raw_file = tempfile.NamedTemporaryFile() raw_path = raw_file.name sorted_file = tempfile.NamedTemporaryFile() sorted_path = sorted_file.name self.log('Downloading {} to {}'.format(table_id, raw_path)) exporter.download_from_storage_and_unzip(raw_file) self.log('Sorting {} to {}'.format(table_id, sorted_path)) cmd = 'head -1 {} > {}'.format(raw_path, sorted_path) subprocess.check_call(cmd, shell=True) field_names = sorted_file.readline().strip().split(',') cmd = generate_sort_cmd(table_id, field_names, raw_path, sorted_path) subprocess.check_call(cmd, shell=True) copy_sql = "COPY {}({}) FROM STDIN WITH (FORMAT CSV)".format( table_id, ','.join(field_names)) with connection.cursor() as cursor: with utils.constraint_and_index_reconstructor(table_id): self.log("Deleting from table %s..." % table_id) cursor.execute("DELETE FROM %s" % table_id) self.log("Copying CSV to %s..." % table_id) cursor.copy_expert(copy_sql, sorted_file) raw_file.close() sorted_file.close()
def conditional_constraint_and_index_reconstructor(enabled): if not enabled: yield else: with utils.constraint_and_index_reconstructor("frontend_measurevalue"): yield