def validate_obfuscation(self): """Validates obfuscation workflow.""" output_target = self.get_targets_from_remote_path( self.test_out, '*.tar.gz.gpg')[0] output_filename = os.path.basename(output_target.path) temp_output_filepath = os.path.join(self.temporary_dir, output_filename) with output_target.open('r') as input_file: with open(temp_output_filepath, 'w') as output_file: copy_file_to_file(input_file, output_file) decrypted_filepath = temp_output_filepath[:-len('.gpg')] fs.decrypt_file(temp_output_filepath, decrypted_filepath, 'insecure_secret.key') with tarfile.open(decrypted_filepath, 'r:gz') as tfile: tfile.extractall(self.temporary_dir) # Validate package metadata info. metadata_filepath = os.path.join(self.temporary_dir, 'metadata_file.json') with open(metadata_filepath) as metadata_file: metadata_info = json.load(metadata_file) self.assertItemsEqual(metadata_info['format_version'], self.FORMAT_VERSION) self.assertItemsEqual(metadata_info['pipeline_version'], self.PIPELINE_VERSION) self.validate_data_obfuscation() self.validate_events_obfuscation()
def validate_obfuscation(self): """Validates obfuscation workflow.""" output_target = self.get_targets_from_remote_path(self.test_out, "*.tar.gz.gpg")[0] output_filename = os.path.basename(output_target.path) temp_output_filepath = os.path.join(self.temporary_dir, output_filename) with output_target.open("r") as input_file: with open(temp_output_filepath, "w") as output_file: copy_file_to_file(input_file, output_file) decrypted_filepath = temp_output_filepath[: -len(".gpg")] fs.decrypt_file(temp_output_filepath, decrypted_filepath, "insecure_secret.key") with tarfile.open(decrypted_filepath, "r:gz") as tfile: tfile.extractall(self.temporary_dir) # Validate package metadata info. metadata_filepath = os.path.join(self.temporary_dir, "metadata_file.json") with open(metadata_filepath) as metadata_file: metadata_info = json.load(metadata_file) self.assertItemsEqual(metadata_info["format_version"], self.FORMAT_VERSION) self.assertItemsEqual(metadata_info["pipeline_version"], self.PIPELINE_VERSION) self.validate_data_obfuscation() self.validate_events_obfuscation()
def validate_obfuscation(self): """Validates obfuscation workflow.""" output_target = PathSetTask([self.test_out], ['*.tar.gz.gpg']).output()[0] output_filename = os.path.basename(output_target.path) output_filepath = os.path.join(self.temporary_dir, output_filename) if output_target.path.startswith('s3://'): output_target = get_target_from_url( output_target.path.replace('s3://', 's3+https://')) with output_target.open('r') as input_file: with open(output_filepath, 'w') as output_file: copy_file_to_file(input_file, output_file) decrypted_filepath = output_filepath[:-len('.gpg')] fs.decrypt_file(output_filepath, decrypted_filepath, 'insecure_secret.key') with tarfile.open(decrypted_filepath, 'r:gz') as tfile: tfile.extractall(self.temporary_dir) # Validate package metadata info. metadata_filepath = os.path.join(self.temporary_dir, 'metadata_file.json') with open(metadata_filepath) as metadata_file: metadata_info = json.load(metadata_file) self.assertItemsEqual(metadata_info['format_version'], self.FORMAT_VERSION) self.assertItemsEqual(metadata_info['pipeline_version'], self.PIPELINE_VERSION) self.validate_data_obfuscation() self.validate_events_obfuscation()
def validate_obfuscation(self): """Validates obfuscation workflow.""" output_target = PathSetTask([self.test_out], ['*.tar.gz.gpg']).output()[0] output_filename = os.path.basename(output_target.path) output_filepath = os.path.join(self.temporary_dir, output_filename) if output_target.path.startswith('s3://'): output_target = get_target_from_url(output_target.path.replace('s3://', 's3+https://')) with output_target.open('r') as input_file: with open(output_filepath, 'w') as output_file: copy_file_to_file(input_file, output_file) decrypted_filepath = output_filepath[:-len('.gpg')] fs.decrypt_file(output_filepath, decrypted_filepath, 'insecure_secret.key') with tarfile.open(decrypted_filepath, 'r:gz') as tfile: tfile.extractall(self.temporary_dir) # Validate package metadata info. metadata_filepath = os.path.join(self.temporary_dir, 'metadata_file.json') with open(metadata_filepath) as metadata_file: metadata_info = json.load(metadata_file) self.assertItemsEqual(metadata_info['format_version'], self.FORMAT_VERSION) self.assertItemsEqual(metadata_info['pipeline_version'], self.PIPELINE_VERSION) self.validate_data_obfuscation() self.validate_events_obfuscation()
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') # Files won't appear in S3 instantaneously, wait for the files to appear. # TODO: exponential backoff for _index in range(30): key = self.s3_client.get_key(remote_url) if key is not None: break else: time.sleep(2) if key is None: self.fail( 'Unable to find expected output file {0}'.format(remote_url)) downloaded_output_path = os.path.join(self.downloaded_outputs, remote_url.split('/')[-1]) key.get_contents_to_filename(downloaded_output_path) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) shell.run([ 'diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name) ])
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') # Files won't appear in S3 instantaneously, wait for the files to appear. # TODO: exponential backoff for _index in range(30): key = self.s3_client.get_key(remote_url) if key is not None: break else: time.sleep(2) if key is None: self.fail('Unable to find expected output file {0}'.format(remote_url)) downloaded_output_path = os.path.join(self.downloaded_outputs, remote_url.split('/')[-1]) key.get_contents_to_filename(downloaded_output_path) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) shell.run(['diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name)])
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') downloaded_output_path = get_file_from_key(self.s3_client, remote_url, self.downloaded_outputs) if downloaded_output_path is None: self.fail( 'Unable to find expected output file {0}'.format(remote_url)) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) shell.run([ 'diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name) ])
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_output_dir = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_output_dir) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') downloaded_output_path = self.download_file_to_local_directory( remote_url, self.downloaded_output_dir) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) original_filename = os.path.join(self.data_dir, 'output', local_file_name) self.assertEventLogEqual(decompressed_file_name, original_filename)
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') downloaded_output_path = get_file_from_key(self.s3_client, remote_url, self.downloaded_outputs) if downloaded_output_path is None: self.fail('Unable to find expected output file {0}'.format(remote_url)) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) original_filename = os.path.join(self.data_dir, 'output', local_file_name) self.assertEventLogEqual(decompressed_file_name, original_filename)