def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') # Files won't appear in S3 instantaneously, wait for the files to appear. # TODO: exponential backoff for _index in range(30): key = self.s3_client.get_key(remote_url) if key is not None: break else: time.sleep(2) if key is None: self.fail( 'Unable to find expected output file {0}'.format(remote_url)) downloaded_output_path = os.path.join(self.downloaded_outputs, remote_url.split('/')[-1]) key.get_contents_to_filename(downloaded_output_path) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) shell.run([ 'diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name) ])
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') # Files won't appear in S3 instantaneously, wait for the files to appear. # TODO: exponential backoff for _index in range(30): key = self.s3_client.get_key(remote_url) if key is not None: break else: time.sleep(2) if key is None: self.fail('Unable to find expected output file {0}'.format(remote_url)) downloaded_output_path = os.path.join(self.downloaded_outputs, remote_url.split('/')[-1]) key.get_contents_to_filename(downloaded_output_path) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) shell.run(['diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name)])
def validate_events_obfuscation(self): """Validates events obfuscation.""" events_dir = os.path.join(self.temporary_dir, 'events') for events_filename in os.listdir(events_dir): events_filepath = os.path.join(events_dir, events_filename) decompressed_filepath = events_filepath[:-len('.gz')] fs.decompress_file(events_filepath, decompressed_filepath) expected_events_filepath = os.path.join(self.data_dir, 'output', 'obfuscation', 'events', events_filename[:-len('.gz')]) with open(decompressed_filepath) as events_output_file: events_output = [json.loads(line) for line in events_output_file] with open(expected_events_filepath) as expected_events_file: expected_events = [json.loads(line) for line in expected_events_file] self.assertItemsEqual(events_output, expected_events)
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') downloaded_output_path = get_file_from_key(self.s3_client, remote_url, self.downloaded_outputs) if downloaded_output_path is None: self.fail( 'Unable to find expected output file {0}'.format(remote_url)) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) shell.run([ 'diff', decompressed_file_name, os.path.join(self.data_dir, 'output', local_file_name) ])
def download_output_files(self): output_targets = self.get_targets_from_remote_path(self.test_out) self.assertEqual(len(output_targets), len(self.output_files)) self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_output_dir = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_output_dir) for output_file in self.output_files: local_file_name = self.generate_file_name(output_file) remote_url = url_path_join(self.test_out, output_file['course_id'], "events", local_file_name + '.gz') downloaded_output_path = self.download_file_to_local_directory(remote_url, self.downloaded_output_dir) decompressed_file_name = downloaded_output_path[:-len('.gz')] output_file['downloaded_path'] = decompressed_file_name fs.decompress_file(downloaded_output_path, decompressed_file_name)
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_output_dir = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_output_dir) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') downloaded_output_path = self.download_file_to_local_directory( remote_url, self.downloaded_output_dir) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) original_filename = os.path.join(self.data_dir, 'output', local_file_name) self.assertEventLogEqual(decompressed_file_name, original_filename)
def validate_output_file(self, date, org_id, site, use_master_key=False): if use_master_key: key_filename = 'insecure_master_secret.key' else: if org_id == 'edx': key_filename = 'insecure_secret.key' else: key_filename = 'insecure_secret_2.key' self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) local_file_name = '{org}-{site}-events-{date}.log'.format( org=org_id, site=site, date=date, ) year = str(date).split("-")[0] remote_url = url_path_join(self.test_out, org_id, site, "events", year, local_file_name + '.gz.gpg') downloaded_output_path = get_file_from_key(self.s3_client, remote_url, self.downloaded_outputs) if downloaded_output_path is None: self.fail('Unable to find expected output file {0}'.format(remote_url)) # first decrypt file decrypted_file_name = downloaded_output_path[:-len('.gpg')] fs.decrypt_file(downloaded_output_path, decrypted_file_name, key_filename) # now decompress file decompressed_file_name = decrypted_file_name[:-len(',gz')] fs.decompress_file(decrypted_file_name, decompressed_file_name) original_filename = os.path.join(self.data_dir, 'output', local_file_name) self.assertEventLogEqual(decompressed_file_name, original_filename)
def download_output_files(self): self.assertEqual(len(list(generate_s3_sources(self.s3_client.s3, self.test_out))), len(self.output_files)) self.temporary_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.temporary_dir) self.downloaded_outputs = os.path.join(self.temporary_dir, 'output') os.makedirs(self.downloaded_outputs) for output_file in self.output_files: local_file_name = self.generate_file_name(output_file) remote_url = url_path_join(self.test_out, output_file['course_id'], "events", local_file_name + '.gz') downloaded_output_path = get_file_from_key(self.s3_client, remote_url, self.downloaded_outputs) if downloaded_output_path is None: self.fail('Unable to find expected output file {0}'.format(remote_url)) decompressed_file_name = downloaded_output_path[:-len('.gz')] output_file['downloaded_path'] = decompressed_file_name fs.decompress_file(downloaded_output_path, decompressed_file_name)