Esempio n. 1
0
    def validate_obfuscation(self):
        """Validates obfuscation workflow."""
        output_target = self.get_targets_from_remote_path(
            self.test_out, '*.tar.gz.gpg')[0]
        output_filename = os.path.basename(output_target.path)
        temp_output_filepath = os.path.join(self.temporary_dir,
                                            output_filename)

        with output_target.open('r') as input_file:
            with open(temp_output_filepath, 'w') as output_file:
                copy_file_to_file(input_file, output_file)

        decrypted_filepath = temp_output_filepath[:-len('.gpg')]
        fs.decrypt_file(temp_output_filepath, decrypted_filepath,
                        'insecure_secret.key')

        with tarfile.open(decrypted_filepath, 'r:gz') as tfile:
            tfile.extractall(self.temporary_dir)

        # Validate package metadata info.
        metadata_filepath = os.path.join(self.temporary_dir,
                                         'metadata_file.json')
        with open(metadata_filepath) as metadata_file:
            metadata_info = json.load(metadata_file)
        self.assertItemsEqual(metadata_info['format_version'],
                              self.FORMAT_VERSION)
        self.assertItemsEqual(metadata_info['pipeline_version'],
                              self.PIPELINE_VERSION)

        self.validate_data_obfuscation()
        self.validate_events_obfuscation()
    def validate_obfuscation(self):
        """Validates obfuscation workflow."""
        output_target = self.get_targets_from_remote_path(self.test_out, "*.tar.gz.gpg")[0]
        output_filename = os.path.basename(output_target.path)
        temp_output_filepath = os.path.join(self.temporary_dir, output_filename)

        with output_target.open("r") as input_file:
            with open(temp_output_filepath, "w") as output_file:
                copy_file_to_file(input_file, output_file)

        decrypted_filepath = temp_output_filepath[: -len(".gpg")]
        fs.decrypt_file(temp_output_filepath, decrypted_filepath, "insecure_secret.key")

        with tarfile.open(decrypted_filepath, "r:gz") as tfile:
            tfile.extractall(self.temporary_dir)

        # Validate package metadata info.
        metadata_filepath = os.path.join(self.temporary_dir, "metadata_file.json")
        with open(metadata_filepath) as metadata_file:
            metadata_info = json.load(metadata_file)
        self.assertItemsEqual(metadata_info["format_version"], self.FORMAT_VERSION)
        self.assertItemsEqual(metadata_info["pipeline_version"], self.PIPELINE_VERSION)

        self.validate_data_obfuscation()
        self.validate_events_obfuscation()
    def validate_obfuscation(self):
        """Validates obfuscation workflow."""
        output_target = PathSetTask([self.test_out],
                                    ['*.tar.gz.gpg']).output()[0]
        output_filename = os.path.basename(output_target.path)
        output_filepath = os.path.join(self.temporary_dir, output_filename)

        if output_target.path.startswith('s3://'):
            output_target = get_target_from_url(
                output_target.path.replace('s3://', 's3+https://'))

        with output_target.open('r') as input_file:
            with open(output_filepath, 'w') as output_file:
                copy_file_to_file(input_file, output_file)

        decrypted_filepath = output_filepath[:-len('.gpg')]
        fs.decrypt_file(output_filepath, decrypted_filepath,
                        'insecure_secret.key')

        with tarfile.open(decrypted_filepath, 'r:gz') as tfile:
            tfile.extractall(self.temporary_dir)

        # Validate package metadata info.
        metadata_filepath = os.path.join(self.temporary_dir,
                                         'metadata_file.json')
        with open(metadata_filepath) as metadata_file:
            metadata_info = json.load(metadata_file)
        self.assertItemsEqual(metadata_info['format_version'],
                              self.FORMAT_VERSION)
        self.assertItemsEqual(metadata_info['pipeline_version'],
                              self.PIPELINE_VERSION)

        self.validate_data_obfuscation()
        self.validate_events_obfuscation()
    def validate_obfuscation(self):
        """Validates obfuscation workflow."""
        output_target = PathSetTask([self.test_out], ['*.tar.gz.gpg']).output()[0]
        output_filename = os.path.basename(output_target.path)
        output_filepath = os.path.join(self.temporary_dir, output_filename)

        if output_target.path.startswith('s3://'):
            output_target = get_target_from_url(output_target.path.replace('s3://', 's3+https://'))

        with output_target.open('r') as input_file:
            with open(output_filepath, 'w') as output_file:
                copy_file_to_file(input_file, output_file)

        decrypted_filepath = output_filepath[:-len('.gpg')]
        fs.decrypt_file(output_filepath, decrypted_filepath, 'insecure_secret.key')

        with tarfile.open(decrypted_filepath, 'r:gz') as tfile:
            tfile.extractall(self.temporary_dir)

        # Validate package metadata info.
        metadata_filepath = os.path.join(self.temporary_dir, 'metadata_file.json')
        with open(metadata_filepath) as metadata_file:
            metadata_info = json.load(metadata_file)
        self.assertItemsEqual(metadata_info['format_version'], self.FORMAT_VERSION)
        self.assertItemsEqual(metadata_info['pipeline_version'], self.PIPELINE_VERSION)

        self.validate_data_obfuscation()
        self.validate_events_obfuscation()
    def validate_output_file(self, date, org_id, site, use_master_key=False):
        if use_master_key:
            key_filename = 'insecure_master_secret.key'
        else:
            if org_id == 'edx':
                key_filename = 'insecure_secret.key'
            else:
                key_filename = 'insecure_secret_2.key'

        self.temporary_dir = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.temporary_dir)

        self.downloaded_outputs = os.path.join(self.temporary_dir, 'output')
        os.makedirs(self.downloaded_outputs)

        local_file_name = '{org}-{site}-events-{date}.log'.format(
            org=org_id,
            site=site,
            date=date,
        )

        year = str(date).split("-")[0]

        remote_url = url_path_join(self.test_out, org_id, site, "events", year,
                                   local_file_name + '.gz.gpg')

        # Files won't appear in S3 instantaneously, wait for the files to appear.
        # TODO: exponential backoff
        for _index in range(30):
            key = self.s3_client.get_key(remote_url)
            if key is not None:
                break
            else:
                time.sleep(2)

        if key is None:
            self.fail(
                'Unable to find expected output file {0}'.format(remote_url))

        downloaded_output_path = os.path.join(self.downloaded_outputs,
                                              remote_url.split('/')[-1])
        key.get_contents_to_filename(downloaded_output_path)

        # first decrypt file
        decrypted_file_name = downloaded_output_path[:-len('.gpg')]
        fs.decrypt_file(downloaded_output_path, decrypted_file_name,
                        key_filename)

        # now decompress file
        decompressed_file_name = decrypted_file_name[:-len(',gz')]
        fs.decompress_file(decrypted_file_name, decompressed_file_name)

        shell.run([
            'diff', decompressed_file_name,
            os.path.join(self.data_dir, 'output', local_file_name)
        ])
    def validate_output_file(self, date, org_id, site, use_master_key=False):
        if use_master_key:
            key_filename = 'insecure_master_secret.key'
        else:
            if org_id == 'edx':
                key_filename = 'insecure_secret.key'
            else:
                key_filename = 'insecure_secret_2.key'

        self.temporary_dir = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.temporary_dir)

        self.downloaded_outputs = os.path.join(self.temporary_dir, 'output')
        os.makedirs(self.downloaded_outputs)

        local_file_name = '{org}-{site}-events-{date}.log'.format(
            org=org_id,
            site=site,
            date=date,
        )

        year = str(date).split("-")[0]

        remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg')

        # Files won't appear in S3 instantaneously, wait for the files to appear.
        # TODO: exponential backoff
        for _index in range(30):
            key = self.s3_client.get_key(remote_url)
            if key is not None:
                break
            else:
                time.sleep(2)

        if key is None:
            self.fail('Unable to find expected output file {0}'.format(remote_url))

        downloaded_output_path = os.path.join(self.downloaded_outputs, remote_url.split('/')[-1])
        key.get_contents_to_filename(downloaded_output_path)

        # first decrypt file
        decrypted_file_name = downloaded_output_path[:-len('.gpg')]
        fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename)

        # now decompress file
        decompressed_file_name = decrypted_file_name[:-len(',gz')]
        fs.decompress_file(decrypted_file_name, decompressed_file_name)

        shell.run(['diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name)])
    def validate_output_file(self, date, org_id, site, use_master_key=False):
        if use_master_key:
            key_filename = 'insecure_master_secret.key'
        else:
            if org_id == 'edx':
                key_filename = 'insecure_secret.key'
            else:
                key_filename = 'insecure_secret_2.key'

        self.temporary_dir = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.temporary_dir)

        self.downloaded_outputs = os.path.join(self.temporary_dir, 'output')
        os.makedirs(self.downloaded_outputs)

        local_file_name = '{org}-{site}-events-{date}.log'.format(
            org=org_id,
            site=site,
            date=date,
        )

        year = str(date).split("-")[0]

        remote_url = url_path_join(self.test_out, org_id, site, "events", year,
                                   local_file_name + '.gz.gpg')

        downloaded_output_path = get_file_from_key(self.s3_client, remote_url,
                                                   self.downloaded_outputs)

        if downloaded_output_path is None:
            self.fail(
                'Unable to find expected output file {0}'.format(remote_url))

        # first decrypt file
        decrypted_file_name = downloaded_output_path[:-len('.gpg')]
        fs.decrypt_file(downloaded_output_path, decrypted_file_name,
                        key_filename)

        # now decompress file
        decompressed_file_name = decrypted_file_name[:-len(',gz')]
        fs.decompress_file(decrypted_file_name, decompressed_file_name)

        shell.run([
            'diff', decompressed_file_name,
            os.path.join(self.data_dir, 'output', local_file_name)
        ])
    def validate_output_file(self, date, org_id, site, use_master_key=False):
        if use_master_key:
            key_filename = 'insecure_master_secret.key'
        else:
            if org_id == 'edx':
                key_filename = 'insecure_secret.key'
            else:
                key_filename = 'insecure_secret_2.key'

        self.temporary_dir = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.temporary_dir)

        self.downloaded_output_dir = os.path.join(self.temporary_dir, 'output')
        os.makedirs(self.downloaded_output_dir)

        local_file_name = '{org}-{site}-events-{date}.log'.format(
            org=org_id,
            site=site,
            date=date,
        )

        year = str(date).split("-")[0]

        remote_url = url_path_join(self.test_out, org_id, site, "events", year,
                                   local_file_name + '.gz.gpg')
        downloaded_output_path = self.download_file_to_local_directory(
            remote_url, self.downloaded_output_dir)

        # first decrypt file
        decrypted_file_name = downloaded_output_path[:-len('.gpg')]
        fs.decrypt_file(downloaded_output_path, decrypted_file_name,
                        key_filename)

        # now decompress file
        decompressed_file_name = decrypted_file_name[:-len(',gz')]
        fs.decompress_file(decrypted_file_name, decompressed_file_name)

        original_filename = os.path.join(self.data_dir, 'output',
                                         local_file_name)
        self.assertEventLogEqual(decompressed_file_name, original_filename)
    def validate_output_file(self, date, org_id, site, use_master_key=False):
        if use_master_key:
            key_filename = 'insecure_master_secret.key'
        else:
            if org_id == 'edx':
                key_filename = 'insecure_secret.key'
            else:
                key_filename = 'insecure_secret_2.key'

        self.temporary_dir = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.temporary_dir)

        self.downloaded_outputs = os.path.join(self.temporary_dir, 'output')
        os.makedirs(self.downloaded_outputs)

        local_file_name = '{org}-{site}-events-{date}.log'.format(
            org=org_id,
            site=site,
            date=date,
        )

        year = str(date).split("-")[0]

        remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg')

        downloaded_output_path = get_file_from_key(self.s3_client, remote_url, self.downloaded_outputs)

        if downloaded_output_path is None:
            self.fail('Unable to find expected output file {0}'.format(remote_url))

        # first decrypt file
        decrypted_file_name = downloaded_output_path[:-len('.gpg')]
        fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename)

        # now decompress file
        decompressed_file_name = decrypted_file_name[:-len(',gz')]
        fs.decompress_file(decrypted_file_name, decompressed_file_name)

        original_filename = os.path.join(self.data_dir, 'output', local_file_name)
        self.assertEventLogEqual(decompressed_file_name, original_filename)