Exemple #1
0
    def backup_table(self, table_name):
        client = Client("hscic")
        sql = "SELECT max(month) FROM {hscic}.%s" % table_name
        latest_date = client.query(sql).rows[0][0]
        latest_year_and_month = latest_date.strftime("%Y_%m")
        table = client.get_table(table_name)

        storage_client = StorageClient()
        bucket = storage_client.bucket()
        year_and_months = set()

        prefix_base = "backups/{}/".format(table_name)

        for blob in bucket.list_blobs(prefix=prefix_base):
            match = re.search("/(\d{4}_\d{2})/", blob.name)
            year_and_months.add(match.groups()[0])

        if latest_year_and_month in year_and_months:
            print("{} table already backed up for {}".format(
                table_name, latest_year_and_month))
            return

        storage_prefix = "{}/{}/{}-".format(prefix_base, latest_year_and_month,
                                            table_name)
        exporter = TableExporter(table, storage_prefix)
        exporter.export_to_storage()
Exemple #2
0
    def create_storage_backed_table(self, table_id, schema, gcs_path):
        gcs_client = StorageClient()
        bucket = gcs_client.bucket()
        if bucket.get_blob(gcs_path) is None:
            raise RuntimeError("Could not find blob at {}".format(gcs_path))

        gcs_uri = "gs://{}/{}".format(self.project, gcs_path)
        schema_as_dict = [{
            "name": s.name,
            "type": s.field_type.lower()
        } for s in schema]
        resource = {
            "tableReference": {
                "tableId": table_id
            },
            "externalDataConfiguration": {
                "csvOptions": {
                    "skipLeadingRows": "1"
                },
                "sourceFormat": "CSV",
                "sourceUris": [gcs_uri],
                "schema": {
                    "fields": schema_as_dict
                },
            },
        }

        path = "/projects/{}/datasets/{}/tables".format(
            self.project, self.dataset_id)

        try:
            self.gcbq_client._connection.api_request(method="POST",
                                                     path=path,
                                                     data=resource)
        except NotFound as e:
            if not dataset_is_missing(e):
                raise
            self.create_dataset()
            self.gcbq_client._connection.api_request(method="POST",
                                                     path=path,
                                                     data=resource)

        return self.get_table(table_id)
Exemple #3
0
    def create_storage_backed_table(self, table_id, schema, gcs_path):
        gcs_client = StorageClient()
        bucket = gcs_client.bucket()
        if bucket.get_blob(gcs_path) is None:
            raise RuntimeError('Could not find blob at {}'.format(gcs_path))

        gcs_uri = 'gs://{}/{}'.format(self.project, gcs_path)
        schema_as_dict = [{'name': s.name, 'type': s.field_type.lower()} for s in schema]
        resource = {
            'tableReference': {'tableId': table_id},
            'externalDataConfiguration': {
                'csvOptions': {'skipLeadingRows': '1'},
                'sourceFormat': 'CSV',
                'sourceUris': [gcs_uri],
                'schema': {'fields': schema_as_dict}
            }
        }

        path = '/projects/{}/datasets/{}/tables'.format(
            self.project,
            self.dataset_id
        )

        try:
            self.gcbq_client._connection.api_request(
                method='POST',
                path=path,
                data=resource
            )
        except NotFound as e:
            if not dataset_is_missing(e):
                raise
            self.create_dataset()
            self.gcbq_client._connection.api_request(
                method='POST',
                path=path,
                data=resource
            )

        return self.get_table(table_id)
Exemple #4
0
 def __init__(self, table, storage_prefix):
     self.table = table
     self.storage_prefix = storage_prefix
     storage_client = StorageClient()
     self.bucket = storage_client.bucket()
 def upload_to_storage(self, local_path, storage_path):
     client = StorageClient()
     bucket = client.bucket()
     blob = bucket.blob(storage_path)
     with open(local_path) as f:
         blob.upload_from_file(f)