예제 #1
0
 def testReadMasterKeyFile(self):
     infile = os.path.join(self.dirname, 'created_master_key_file')
     self.assertFalse(os.path.exists(infile))
     load_lib._CreateAndStoreMasterKeyFile(infile)
     self.assertTrue(os.path.exists(infile))
     f = open(infile, 'rt')
     master_key = base64.b64decode(f.read())
     self.assertEquals(master_key, load_lib.ReadMasterKeyFile(infile))
     f.close()
    def Query(self, query, **kwds):
        """Execute the given query, returning the created job and info for print.

    Arguments:
      query: Query to execute.
      **kwds: Passed on to BigqueryClient.ExecuteJob.

    Returns:
      The resulting job info and other info necessary for printing.
    """
        self._CheckKeyfileFlag()
        master_key = load_lib.ReadMasterKeyFile(self.master_key_filename)

        try:
            clauses = parser.ParseQuery(query)
        except ParseException as e:
            raise bigquery_client.BigqueryInvalidQueryError(
                e, None, None, None)
        if clauses['FROM']:
            table_id = '%s_%s' % (clauses['FROM'][0],
                                  self._GetTableCreationTime(
                                      clauses['FROM'][0]))
            hashed_table_key, table_version, table_schema = self._GetEBQTableInfo(
                clauses['FROM'][0])
            hashed_master_key = hashlib.sha1(master_key)
            # pylint: disable=too-many-function-args
            hashed_master_key = base64.b64encode(hashed_master_key.digest())
            if hashed_master_key != hashed_table_key:
                raise bigquery_client.BigqueryAccessDeniedError(
                    'Invalid master key for this table.', None, None, None)
            if table_version != util.EBQ_TABLE_VERSION:
                raise bigquery_client.BigqueryNotFoundError(
                    'Invalid table version.', None, None, None)
            cipher = ecrypto.ProbabilisticCipher(master_key)
            orig_schema = zlib.decompress(
                cipher.Decrypt(base64.b64decode(table_schema), raw=True))
            orig_schema = json.loads(orig_schema.decode('utf-8'))
        else:
            table_id = None
            orig_schema = []

        manifest = query_lib.QueryManifest.Generate()
        rewritten_query, print_args = query_lib.RewriteQuery(
            clauses, orig_schema, master_key, table_id, manifest)
        job = super(EncryptedBigqueryClient,
                    self).Query(rewritten_query, **kwds)
        self._LoadJobStatistics(manifest, job)

        printer = EncryptedTablePrinter(**print_args)
        bq.Factory.ClientTablePrinter.SetTablePrinter(printer)

        return job
    def CreateTable(self,
                    reference,
                    ignore_existing=False,
                    schema=None,
                    description=None,
                    friendly_name=None,
                    expiration=None):
        """Create a table corresponding to TableReference.

    Arguments:
      reference: the TableReference to create.
      ignore_existing: (boolean, default False) If False, raise an exception if
        the dataset already exists.
      schema: An required schema (also requires a master key).
      description: an optional table description.
      friendly_name: an optional friendly name for the table.
      expiration: optional expiration time in milliseconds since the epoch.

    Raises:
      TypeError: if reference is not a TableReference.
      BigqueryDuplicateError: if reference exists and ignore_existing
        is False.
    """
        if schema is None:
            raise bigquery_client.BigqueryNotFoundError(
                'A schema must be specified when making a table.', None, None,
                None)
        self._CheckKeyfileFlag()
        schema = load_lib.ReadSchemaFile(schema)
        master_key = load_lib.ReadMasterKeyFile(self.master_key_filename, True)
        # pylint: disable=too-many-function-args
        hashed_key = base64.b64encode(hashlib.sha1(master_key).digest())
        cipher = ecrypto.ProbabilisticCipher(master_key)
        pretty_schema = json.dumps(schema)
        pretty_schema = pretty_schema.encode('utf-8')
        pretty_schema = zlib.compress(pretty_schema)
        encrypted_schema = base64.b64encode(cipher.Encrypt(pretty_schema))
        if description is None:
            description = ''
        new_description = util.ConstructTableDescription(
            description, hashed_key, util.EBQ_TABLE_VERSION, encrypted_schema)
        new_schema = load_lib.RewriteSchema(schema)
        super(EncryptedBigqueryClient,
              self).CreateTable(reference, ignore_existing, new_schema,
                                new_description, friendly_name, expiration)
    def UpdateTable(self,
                    reference,
                    schema=None,
                    description=None,
                    friendly_name=None,
                    expiration=None):
        """Updates a table.

    Arguments:
      reference: the DatasetReference to update.
      schema: an optional schema.
      description: an optional table description.
      friendly_name: an optional friendly name for the table.
      expiration: optional expiration time in milliseconds since the epoch.
    """
        if schema:
            self._CheckKeyfileFlag()

        if description:
            hashed_table_key, table_version, table_schema = (
                self._GetEBQTableInfo(str(reference)))
            if schema:
                master_key = load_lib.ReadMasterKeyFile(
                    self.master_key_filename)
                # pylint: disable=too-many-function-args
                hashed_key = base64.b64encode(
                    hashlib.sha1(master_key).digest())
                if hashed_key != hashed_table_key:
                    raise bigquery_client.BigqueryAccessDeniedError(
                        'Invalid master key for this table.', None, None, None)
                cipher = ecrypto.ProbabilisticCipher(master_key)
                real_schema = json.dumps(load_lib.RewriteSchema(schema))
                real_schema = str.encode('utf-8')
                table_schema = base64.b64encode(
                    cipher.Encrypt(zlib.compress(real_schema)))
            description = util.ConstructTableDescription(
                description, hashed_table_key, table_version, table_schema)

        # Rewrite the schema if the schema is to be updated.
        if schema:
            schema = load_lib.RewriteSchema(schema)

        super(EncryptedBigqueryClient,
              self).UpdateTable(reference, schema, description, friendly_name,
                                expiration)
    def Load(self, destination_table, source, schema=None, **kwds):
        """Encrypt the given data and then load it into BigQuery.

    The job will execute synchronously if sync=True is provided as an
    argument.

    Args:
      destination_table: TableReference to load data into.
      source: String specifying source data to load.
      schema: The schema that defines fields to be loaded.
      **kwds: Passed on to self.ExecuteJob.

    Returns:
      The resulting job info.
    """
        self._CheckKeyfileFlag()
        self._CheckSchemaFile(schema)

        # To make encrypting more secure, we use different keys for each table
        # and cipher. To generate a different key for each table, we need a distinct
        # table identifier for each table. A table name is not secure since a table
        # can be deleted and created with the same name and, thus the same key. The
        # only distinct identifier happens to be creation time. Therefore, we must
        # construct a table if it does not exist so we can use the creation time
        # to encrypt values.
        try:
            self.CreateTable(destination_table, schema=schema)
        except bigquery_client.BigqueryDuplicateError:
            pass  # Table already exists.

        temp_dir = tempfile.mkdtemp()
        orig_schema = load_lib.ReadSchemaFile(schema)
        new_schema = load_lib.RewriteSchema(orig_schema)
        new_schema_file = '%s/schema.enc_schema' % temp_dir
        # write the new schema as a json file
        with open(new_schema_file, 'wt') as f:
            json.dump(new_schema, f, indent=2)
        new_source_file = '%s/data.enc_data' % temp_dir
        # TODO(user): Put the filepath to the master key in .bigqueryrc file.
        master_key = load_lib.ReadMasterKeyFile(self.master_key_filename, True)
        table_name = str(destination_table).split(':')[-1]
        table_id = '%s_%s' % (
            table_name, self._GetTableCreationTime(str(destination_table)))
        hashed_table_key, table_version, table_schema = self._GetEBQTableInfo(
            str(destination_table))
        hashed_master_key = hashlib.sha1(master_key)
        # pylint: disable=too-many-function-args
        hashed_master_key = base64.b64encode(hashed_master_key.digest())
        if hashed_master_key != hashed_table_key:
            raise bigquery_client.BigqueryAccessDeniedError(
                'Invalid master key for this table.', None, None, None)
        if table_version != util.EBQ_TABLE_VERSION:
            raise bigquery_client.BigqueryNotFoundError(
                'Invalid table version.', None, None, None)
        # TODO(user): Generate a different key.
        cipher = ecrypto.ProbabilisticCipher(master_key)
        table_schema = cipher.Decrypt(base64.b64decode(table_schema), raw=True)
        table_schema = zlib.decompress(table_schema)
        table_schema = table_schema.decode('utf-8')
        table_schema = json.loads(table_schema)
        if table_schema != orig_schema:
            raise bigquery_client.BigqueryAccessDeniedError(
                'Invalid schema for this table.', None, None, None)
        if kwds['source_format'] == 'NEWLINE_DELIMITED_JSON':
            load_lib.ConvertJsonDataFile(orig_schema, master_key, table_id,
                                         source, new_source_file)
        elif kwds['source_format'] == 'CSV' or not kwds['source_format']:
            load_lib.ConvertCsvDataFile(orig_schema, master_key, table_id,
                                        source, new_source_file)
        else:
            raise app.UsageError(
                'Currently, we do not allow loading from file types other than\n'
                'NEWLINE_DELIMITED_JSON and CSV.')
        job = super(EncryptedBigqueryClient, self).Load(destination_table,
                                                        new_source_file,
                                                        schema=new_schema_file,
                                                        **kwds)
        try:
            shutil.rmtree(temp_dir)
        except OSError:
            raise OSError('Temp file deleted by user before termination.')
        return job