def conditional_constraint_and_index_reconstructor(options):
    if 'measure' in options and options['measure']:
        # This is an optimisation that only makes sense when we're
        # updating the entire table.
        yield
    else:
        yield utils.constraint_and_index_reconstructor('frontend_measurevalue')
Exemple #2
0
def conditional_constraint_and_index_reconstructor(options):
    if 'measure' in options and options['measure']:
        # This is an optimisation that only makes sense when we're
        # updating the entire table.
        yield
    else:
        yield utils.constraint_and_index_reconstructor('frontend_measurevalue')
    def test_reconstructor_does_work(self):
        from django.db import connection
        from common.utils import constraint_and_index_reconstructor

        with connection.cursor() as cursor:
            # Set up a table
            cursor.execute("CREATE TABLE firmness (id integer PRIMARY KEY)")
            cursor.execute("""
                CREATE TABLE tofu (
                  id integer PRIMARY KEY,
                  brand varchar,
                  firmness_id integer REFERENCES firmness (id))
            """)
            cursor.execute("CLUSTER tofu USING tofu_pkey")
            cursor.execute("CREATE INDEX ON tofu (brand)")
            with constraint_and_index_reconstructor('tofu'):
                cursor.execute(
                    "SELECT count(*) FROM pg_indexes WHERE tablename = 'tofu'"
                )
                self.assertEqual(cursor.fetchone()[0], 0)
            cursor.execute(
                "SELECT count(*) FROM pg_indexes WHERE tablename = 'tofu'"
            )
            self.assertEqual(cursor.fetchone()[0], 2)
            self.assertEqual(_cluster_count(cursor), 1)
    def test_reconstructor_works_even_when_exception_thrown(self):
        with connection.cursor() as cursor:
            # Set up a table
            cursor.execute("CREATE TABLE firmness (id integer PRIMARY KEY)")
            cursor.execute("""
                CREATE TABLE tofu (
                  id integer PRIMARY KEY,
                  brand varchar,
                  firmness_id integer REFERENCES firmness (id))
            """)
            cursor.execute("CLUSTER tofu USING tofu_pkey")
            cursor.execute("CREATE INDEX ON tofu (brand)")

            class BadThingError(Exception):
                pass

            with self.assertRaises(BadThingError):
                with constraint_and_index_reconstructor("tofu"):
                    raise BadThingError(
                        "3.6 roentgen; not great, not terrible")

            cursor.execute(
                "SELECT count(*) FROM pg_indexes WHERE tablename = 'tofu'")
            self.assertEqual(cursor.fetchone()[0], 2)
            self.assertEqual(_cluster_count(cursor), 1)
def conditional_constraint_and_index_reconstructor(options):
    if options["measure"]:
        # This is an optimisation that only makes sense when we're
        # updating the entire table.
        yield
    else:
        with utils.constraint_and_index_reconstructor("frontend_measurevalue"):
            yield
Exemple #6
0
 def test_reconstructor_does_work(self):
     from django.db import connection
     from common.utils import constraint_and_index_reconstructor
     start_count = Measure.objects.count()
     with connection.cursor() as cursor:
         cursor.execute("SELECT COUNT(*) FROM pg_indexes")
         old_count = cursor.fetchone()[0]
         with constraint_and_index_reconstructor('frontend_measurevalue'):
             Measure.objects.all().delete()
             cursor.execute("SELECT COUNT(*) FROM pg_indexes")
             new_count = cursor.fetchone()[0]
         cursor.execute("SELECT COUNT(*) FROM pg_indexes")
         after_count = cursor.fetchone()[0]
     self.assertLess(Measure.objects.count(), start_count)
     self.assertLess(new_count, old_count)
     self.assertEqual(old_count, after_count)
 def fill_views(self):
     paths = []
     if self.view:
         for path in self.view_paths:
             if self.view in path:
                 paths.append(path)
                 break
     else:
         paths = self.view_paths
     pool = Pool(processes=len(paths))
     pool_results = []
     prescribing_date = ImportLog.objects.latest_in_category(
         'prescribing').current_at.strftime('%Y-%m-%d')
     for view in paths:
         if self.view and self.view not in view:
             continue
         # Perform bigquery parts of operation in parallel
         result = pool.apply_async(
             query_and_export, [self.dataset, view, prescribing_date])
         pool_results.append(result)
     pool.close()
     pool.join()  # wait for all worker processes to exit
     for result in pool_results:
         tablename, gcs_uri = result.get()
         f = download_and_unzip(gcs_uri)
         copy_str = "COPY %s(%s) FROM STDIN "
         copy_str += "WITH (FORMAT CSV)"
         fieldnames = f.readline().split(',')
         with connection.cursor() as cursor:
             with utils.constraint_and_index_reconstructor(tablename):
                 self.log("Deleting from table...")
                 cursor.execute("DELETE FROM %s" % tablename)
                 self.log("Copying CSV to postgres...")
                 try:
                     cursor.copy_expert(copy_str % (
                         tablename, ','.join(fieldnames)), f)
                 except Exception:
                     import shutil
                     shutil.copyfile(f.name, "/tmp/error")
                     raise
         f.close()
         self.log("-------------")
 def fill_views(self):
     paths = []
     if self.view:
         for path in self.view_paths:
             if self.view in path:
                 paths.append(path)
                 break
     else:
         paths = self.view_paths
     pool = Pool(processes=len(paths))
     pool_results = []
     prescribing_date = ImportLog.objects.latest_in_category(
         'prescribing').current_at.strftime('%Y-%m-%d')
     for view in paths:
         if self.view and self.view not in view:
             continue
         # Perform bigquery parts of operation in parallel
         result = pool.apply_async(
             query_and_export, [self.dataset, view, prescribing_date])
         pool_results.append(result)
     pool.close()
     pool.join()  # wait for all worker processes to exit
     for result in pool_results:
         tablename, gcs_uri = result.get()
         f = download_and_unzip(gcs_uri)
         copy_str = "COPY %s(%s) FROM STDIN "
         copy_str += "WITH (FORMAT CSV)"
         fieldnames = f.readline().split(',')
         with connection.cursor() as cursor:
             with utils.constraint_and_index_reconstructor(tablename):
                 self.log("Deleting from table...")
                 cursor.execute("DELETE FROM %s" % tablename)
                 self.log("Copying CSV to postgres...")
                 try:
                     cursor.copy_expert(copy_str % (
                         tablename, ','.join(fieldnames)), f)
                 except Exception:
                     import shutil
                     shutil.copyfile(f.name, "/tmp/error")
                     raise
         f.close()
         self.log("-------------")
    def download_and_import(self, table):
        '''Download table from storage and import into local database.

        We sort the downloaded file with `sort` rather than in BigQuery,
        because we hit resource limits when we try to do so.  See #698 and #711
        for discussion.
        '''
        table_id = table.table_id
        storage_prefix = 'hscic/views/{}-'.format(table_id)
        exporter = TableExporter(table, storage_prefix)

        raw_file = tempfile.NamedTemporaryFile()
        raw_path = raw_file.name
        sorted_file = tempfile.NamedTemporaryFile()
        sorted_path = sorted_file.name

        self.log('Downloading {} to {}'.format(table_id, raw_path))
        exporter.download_from_storage_and_unzip(raw_file)

        self.log('Sorting {} to {}'.format(table_id, sorted_path))
        cmd = 'head -1 {} > {}'.format(raw_path, sorted_path)
        subprocess.check_call(cmd, shell=True)

        field_names = sorted_file.readline().strip().split(',')

        cmd = generate_sort_cmd(table_id, field_names, raw_path, sorted_path)
        subprocess.check_call(cmd, shell=True)

        copy_sql = "COPY {}({}) FROM STDIN WITH (FORMAT CSV)".format(
            table_id, ','.join(field_names))

        with connection.cursor() as cursor:
            with utils.constraint_and_index_reconstructor(table_id):
                self.log("Deleting from table %s..." % table_id)
                cursor.execute("DELETE FROM %s" % table_id)
                self.log("Copying CSV to %s..." % table_id)
                cursor.copy_expert(copy_sql, sorted_file)

        raw_file.close()
        sorted_file.close()
def conditional_constraint_and_index_reconstructor(enabled):
    if not enabled:
        yield
    else:
        with utils.constraint_and_index_reconstructor("frontend_measurevalue"):
            yield