def setUp(self): self.s3_client = S3Client() config_json = os.getenv('ACCEPTANCE_TEST_CONFIG') try: with open(config_json, 'r') as config_json_file: self.config = json.load(config_json_file) except (IOError, TypeError): try: self.config = json.loads(config_json) except TypeError: self.config = {} # The name of an existing job flow to run the test on assert ('job_flow_name' in self.config) # The git URL of the pipeline repository to check this code out from. assert ('tasks_repo' in self.config) # The branch of the pipeline repository to test. Note this can differ from the branch that is currently # checked out and running this code. assert ('tasks_branch' in self.config) # Where to store logs generated by the pipeline assert ('tasks_log_path' in self.config) # The user to connect to the job flow over SSH with. assert ('connection_user' in self.config) # Where the pipeline should output data, should be a URL pointing to a directory. assert ('tasks_output_url' in self.config) # Allow for parallel execution of the test by specifying a different identifier. Using an identical identifier # allows for old virtualenvs to be reused etc, which is why a random one is not simply generated with each run. assert ('identifier' in self.config) # A URL to a JSON file that contains most of the connection information for the MySQL database. assert ('credentials_file_url' in self.config) # A URL to a JSON file that contains most of the connection information for the Veritca database. assert ('vertica_creds_url' in self.config) # A URL to a build of the oddjob third party library assert 'oddjob_jar' in self.config # A URL to a maxmind compatible geolocation database file assert 'geolocation_data' in self.config self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures') url = self.config['tasks_output_url'] m = hashlib.md5() m.update(self.config['identifier']) self.identifier = m.hexdigest() self.test_root = url_path_join(url, self.identifier, self.__class__.__name__) self.test_src = url_path_join(self.test_root, 'src') self.test_out = url_path_join(self.test_root, 'out') self.catalog_path = 'http://acceptance.test/api/courses/v2' database_name = 'test_' + self.identifier schema = 'test_' + self.identifier import_database_name = 'import_' + database_name export_database_name = 'export_' + database_name self.warehouse_path = url_path_join(self.test_root, 'warehouse') task_config_override = { 'hive': { 'database': database_name, 'warehouse_path': self.warehouse_path }, 'map-reduce': { 'marker': url_path_join(self.test_root, 'marker') }, 'manifest': { 'path': url_path_join(self.test_root, 'manifest'), 'lib_jar': self.config['oddjob_jar'] }, 'database-import': { 'credentials': self.config['credentials_file_url'], 'destination': self.warehouse_path, 'database': import_database_name }, 'database-export': { 'credentials': self.config['credentials_file_url'], 'database': export_database_name }, 'vertica-export': { 'credentials': self.config['vertica_creds_url'], 'schema': schema }, 'course-catalog': { 'catalog_path': self.catalog_path }, 'geolocation': { 'geolocation_data': self.config['geolocation_data'] }, 'event-logs': { 'source': self.test_src }, 'course-structure': { 'api_root_url': 'acceptance.test', 'access_token': 'acceptance' } } log.info('Running test: %s', self.id()) log.info('Using executor: %s', self.config['identifier']) log.info('Generated Test Identifier: %s', self.identifier) self.import_db = db.DatabaseService(self.config, import_database_name) self.export_db = db.DatabaseService(self.config, export_database_name) self.task = task.TaskService(self.config, task_config_override, self.identifier) self.vertica = vertica.VerticaService(self.config, schema) self.hive = hive.HiveService(self.task, self.config, database_name) self.reset_external_state()
def setUp(self): try: self.s3_client = ScalableS3Client() except Exception: self.s3_client = None self.config = get_test_config() for env_var in ('TASKS_REPO', 'TASKS_BRANCH', 'IDENTIFIER', 'JOB_FLOW_NAME', 'IS_REMOTE'): if env_var in os.environ: self.config[env_var.lower()] = os.environ[env_var] if 'is_remote' in self.config: self.config['is_remote'] = self.config['is_remote'].lower( ) not in ('0', 'false', 'f') else: self.config['is_remote'] = True if self.config['is_remote']: # The name of an existing job flow to run the test on assert ('job_flow_name' in self.config or 'host' in self.config) # The git URL of the pipeline repository to check this code out from. assert ('tasks_repo' in self.config) # The branch of the pipeline repository to test. Note this can differ from the branch that is currently # checked out and running this code. assert ('tasks_branch' in self.config) # Where to store logs generated by the pipeline assert ('tasks_log_path' in self.config) # The user to connect to the job flow over SSH with. assert ('connection_user' in self.config) # Where the pipeline should output data, should be a URL pointing to a directory. assert ('tasks_output_url' in self.config) # Allow for parallel execution of the test by specifying a different identifier. Using an identical identifier # allows for old virtualenvs to be reused etc, which is why a random one is not simply generated with each run. assert ('identifier' in self.config) # A URL to a JSON file that contains most of the connection information for the MySQL database. assert ('credentials_file_url' in self.config) # A URL to a build of the oddjob third party library assert 'oddjob_jar' in self.config # A URL to a maxmind compatible geolocation database file assert 'geolocation_data' in self.config self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures') url = self.config['tasks_output_url'] m = hashlib.md5() m.update(self.config['identifier']) self.identifier = m.hexdigest() self.test_root = url_path_join(url, self.identifier, self.__class__.__name__) self.test_src = url_path_join(self.test_root, 'src') self.test_out = url_path_join(self.test_root, 'out') # Use a local dir for devstack testing, or s3 for production testing. self.report_output_root = self.config.get( 'report_output_root', url_path_join(self.test_out, 'reports')) self.catalog_path = 'http://acceptance.test/api/courses/v2' database_name = 'test_' + self.identifier schema = 'test_' + self.identifier import_database_name = 'acceptance_import_' + database_name export_database_name = 'acceptance_export_' + database_name otto_database_name = 'acceptance_otto_' + database_name elasticsearch_alias = 'alias_test_' + self.identifier self.warehouse_path = url_path_join(self.test_root, 'warehouse') self.edx_rest_api_cache_root = url_path_join(self.test_src, 'edx-rest-api-cache') task_config_override = { 'hive': { 'database': database_name, 'warehouse_path': self.warehouse_path }, 'map-reduce': { 'marker': url_path_join(self.test_root, 'marker') }, 'manifest': { 'path': url_path_join(self.test_root, 'manifest'), 'lib_jar': self.config['oddjob_jar'], }, 'database-import': { 'credentials': self.config['credentials_file_url'], 'destination': self.warehouse_path, 'database': import_database_name }, 'database-export': { 'credentials': self.config['credentials_file_url'], 'database': export_database_name }, 'otto-database-import': { 'credentials': self.config['credentials_file_url'], 'database': otto_database_name }, 'course-catalog': { 'catalog_path': self.catalog_path }, 'geolocation': { 'geolocation_data': self.config['geolocation_data'] }, 'event-logs': { 'source': as_list_param(self.test_src, escape_quotes=False), 'pattern': as_list_param(".*tracking.log-(?P<date>\\d{8}).*\\.gz", escape_quotes=False), }, 'segment-logs': { 'source': as_list_param(self.test_src, escape_quotes=False), 'pattern': as_list_param(".*segment.log-(?P<date>\\d{8}).*\\.gz", escape_quotes=False), }, 'course-structure': { 'api_root_url': 'acceptance.test', 'access_token': 'acceptance' }, 'module-engagement': { 'alias': elasticsearch_alias }, 'elasticsearch': {}, 'problem-response': { 'report_fields': '["username","problem_id","answer_id","location","question","score","max_score",' '"correct","answer","total_attempts","first_attempt_date","last_attempt_date"]', 'report_field_list_delimiter': '"|"', 'report_field_datetime_format': '%Y-%m-%dT%H:%M:%SZ', 'report_output_root': self.report_output_root, 'partition_format': '%Y-%m-%dT%H', }, 'edx-rest-api': { 'client_id': 'oauth_id', 'client_secret': 'oauth_secret', 'oauth_username': '******', 'oauth_password': '******', 'auth_url': 'http://acceptance.test', }, 'course-blocks': { 'api_root_url': 'http://acceptance.test/api/courses/v1/blocks/', }, 'course-list': { 'api_root_url': 'http://acceptance.test/api/courses/v1/courses/', }, } if 'elasticsearch_host' in self.config: task_config_override['elasticsearch']['host'] = as_list_param( self.config['elasticsearch_host'], escape_quotes=False) if 'elasticsearch_connection_class' in self.config: task_config_override['elasticsearch'][ 'connection_type'] = self.config[ 'elasticsearch_connection_class'] if 'manifest_input_format' in self.config: task_config_override['manifest']['input_format'] = self.config[ 'manifest_input_format'] if 'hive_version' in self.config: task_config_override['hive']['version'] = self.config[ 'hive_version'] log.info('Running test: %s', self.id()) log.info('Using executor: %s', self.config['identifier']) log.info('Generated Test Identifier: %s', self.identifier) self.import_db = db.DatabaseService(self.config, import_database_name) self.export_db = db.DatabaseService(self.config, export_database_name) self.otto_db = db.DatabaseService(self.config, otto_database_name) self.task = task.TaskService(self.config, task_config_override, self.identifier) self.hive = hive.HiveService(self.task, self.config, database_name) self.elasticsearch = elasticsearch_service.ElasticsearchService( self.config, elasticsearch_alias) self.reset_external_state() max_diff = os.getenv('MAX_DIFF', None) if max_diff is not None: if max_diff.lower() == "infinite": self.maxDiff = None else: self.maxDiff = int(max_diff)
def setUp(self): try: self.s3_client = S3Client() except Exception: self.s3_client = None self.config = get_test_config() for env_var in ('TASKS_REPO', 'TASKS_BRANCH', 'IDENTIFIER', 'JOB_FLOW_NAME'): if env_var in os.environ: self.config[env_var.lower()] = os.environ[env_var] # The name of an existing job flow to run the test on assert ('job_flow_name' in self.config or 'host' in self.config) # The git URL of the pipeline repository to check this code out from. assert ('tasks_repo' in self.config) # The branch of the pipeline repository to test. Note this can differ from the branch that is currently # checked out and running this code. assert ('tasks_branch' in self.config) # Where to store logs generated by the pipeline assert ('tasks_log_path' in self.config) # The user to connect to the job flow over SSH with. assert ('connection_user' in self.config) # Where the pipeline should output data, should be a URL pointing to a directory. assert ('tasks_output_url' in self.config) # Allow for parallel execution of the test by specifying a different identifier. Using an identical identifier # allows for old virtualenvs to be reused etc, which is why a random one is not simply generated with each run. assert ('identifier' in self.config) # A URL to a JSON file that contains most of the connection information for the MySQL database. assert ('credentials_file_url' in self.config) # A URL to a build of the oddjob third party library assert 'oddjob_jar' in self.config # A URL to a maxmind compatible geolocation database file assert 'geolocation_data' in self.config self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures') url = self.config['tasks_output_url'] m = hashlib.md5() m.update(self.config['identifier']) self.identifier = m.hexdigest() self.test_root = url_path_join(url, self.identifier, self.__class__.__name__) self.test_src = url_path_join(self.test_root, 'src') self.test_out = url_path_join(self.test_root, 'out') self.catalog_path = 'http://acceptance.test/api/courses/v2' database_name = 'test_' + self.identifier schema = 'test_' + self.identifier import_database_name = 'acceptance_import_' + database_name export_database_name = 'acceptance_export_' + database_name otto_database_name = 'acceptance_otto_' + database_name elasticsearch_alias = 'alias_test_' + self.identifier self.warehouse_path = url_path_join(self.test_root, 'warehouse') task_config_override = { 'hive': { 'database': database_name, 'warehouse_path': self.warehouse_path }, 'map-reduce': { 'marker': url_path_join(self.test_root, 'marker') }, 'manifest': { 'path': url_path_join(self.test_root, 'manifest'), 'lib_jar': self.config['oddjob_jar'] }, 'database-import': { 'credentials': self.config['credentials_file_url'], 'destination': self.warehouse_path, 'database': import_database_name }, 'database-export': { 'credentials': self.config['credentials_file_url'], 'database': export_database_name }, 'otto-database-import': { 'credentials': self.config['credentials_file_url'], 'database': otto_database_name }, 'course-catalog': { 'catalog_path': self.catalog_path }, 'geolocation': { 'geolocation_data': self.config['geolocation_data'] }, 'event-logs': { 'source': self.test_src }, 'course-structure': { 'api_root_url': 'acceptance.test', 'access_token': 'acceptance' }, 'module-engagement': { 'alias': elasticsearch_alias }, 'elasticsearch': {} } if 'vertica_creds_url' in self.config: task_config_override['vertica-export'] = { 'credentials': self.config['vertica_creds_url'], 'schema': schema } if 'elasticsearch_host' in self.config: task_config_override['elasticsearch']['host'] = self.config[ 'elasticsearch_host'] if 'elasticsearch_connection_class' in self.config: task_config_override['elasticsearch'][ 'connection_type'] = self.config[ 'elasticsearch_connection_class'] if 'manifest_input_format' in self.config: task_config_override['manifest']['input_format'] = self.config[ 'manifest_input_format'] if 'hive_version' in self.config: task_config_override['hive']['version'] = self.config[ 'hive_version'] log.info('Running test: %s', self.id()) log.info('Using executor: %s', self.config['identifier']) log.info('Generated Test Identifier: %s', self.identifier) self.import_db = db.DatabaseService(self.config, import_database_name) self.export_db = db.DatabaseService(self.config, export_database_name) self.otto_db = db.DatabaseService(self.config, otto_database_name) self.task = task.TaskService(self.config, task_config_override, self.identifier) self.hive = hive.HiveService(self.task, self.config, database_name) self.vertica = vertica.VerticaService(self.config, schema) self.elasticsearch = elasticsearch_service.ElasticsearchService( self.config, elasticsearch_alias) self.reset_external_state() max_diff = os.getenv('MAX_DIFF', None) if max_diff is not None: if max_diff.lower() == "infinite": self.maxDiff = None else: self.maxDiff = int(max_diff)