def test_collection_of_resource_uuids(self): ''' To ensure that we don't erase crucial data resources in a workspace we have a utility function that scans through the executed operations of a workspace and returns a list of the "used" resource UUIDs. Here, we test that it returns the expected list ''' # first test one where we expect an empty list-- no resources # are used or created: f = os.path.join( TESTDIR, 'simple_op_test.json' ) d = read_operation_json(f) mock_inputs = { 'some_string': 'abc' } result = collect_resource_uuids(d['inputs'], mock_inputs) self.assertEqual(result, []) # test empty output/input dict: mock_outputs = {} result = collect_resource_uuids(d['outputs'], mock_outputs) self.assertEqual(result, []) # test a non-empty return f = os.path.join( TESTDIR, 'valid_workspace_operation.json' ) d = read_operation_json(f) mock_outputs = { 'norm_counts': 'abc', 'dge_table': 'xyz' } result = collect_resource_uuids(d['outputs'], mock_outputs) self.assertEqual(result, ['abc', 'xyz']) # test if one of the DataResource outputs was not used (which is fine) # and the output value was assigned to None mock_outputs = { 'norm_counts': None, 'dge_table': 'xyz' } result = collect_resource_uuids(d['outputs'], mock_outputs) self.assertEqual(result, ['xyz']) # test if there is some discrepancy in the expected and actual inputs # or outputs mock_outputs = { 'junk': 'abc' } with self.assertRaises(Exception): result = collect_resource_uuids(d['outputs'], mock_outputs)
def add_dummy_operation(self): # use a valid operation spec contained in the test folder op_spec_file = os.path.join(settings.BASE_DIR, 'api', 'tests', 'operation_test_files', 'valid_operation.json') d = read_operation_json(op_spec_file) d['id'] = str(uuid.uuid4()) d['git_hash'] = 'abcd' d['repository_url'] = 'https://github.com/some-repo/' d['repo_name'] = 'some-repo' op_serializer = validate_operation(d) # need to make a directory with dummy files to use the # `save_operation` function dummy_dir_path = os.path.join('/tmp', 'dummy_op') try: os.mkdir(dummy_dir_path) except OSError as ex: if ex.errno == errno.EEXIST: pass else: raise Exception('Failed to create directory at {p}'.format( p=dummy_dir_path)) op = op_serializer.get_instance() op_data = OperationSerializer(op).data save_operation(op_data, dummy_dir_path, True) OperationDbModel.objects.create(id=op.id, name=op.name) # use a valid operation spec contained in the test folder op_spec_file = os.path.join(settings.BASE_DIR, 'api', 'tests', 'operation_test_files', 'valid_workspace_operation.json') d = read_operation_json(op_spec_file) d['id'] = str(uuid.uuid4()) d['git_hash'] = 'abcd' d['repository_url'] = 'https://github.com/some-repo/' d['repo_name'] = 'some-repo' op_serializer = validate_operation(d) # need to make a directory with dummy files to use the # `save_operation` function dummy_dir_path = os.path.join('/tmp', 'dummy_op2') try: os.mkdir(dummy_dir_path) except OSError as ex: if ex.errno == errno.EEXIST: pass else: raise Exception('Failed to create directory at {p}'.format( p=dummy_dir_path)) op = op_serializer.get_instance() op_data = OperationSerializer(op).data save_operation(op_data, dummy_dir_path, True) OperationDbModel.objects.create(id=op.id, name=op.name)
def test_basic_user_inputs(self): ''' This tests that the proper validation happens when comparing the user-submitted values and the input specifications. Here, the values are all valid. ''' f = os.path.join(TESTDIR, 'sample_for_basic_types.json') d = read_operation_json(f) # some valid user inputs corresponding to the input specifications sample_inputs = { 'int_type': 10, 'positive_int_type': 3, 'nonnegative_int_type': 0, 'bounded_int_type': 2, 'float_type': 0.2, 'bounded_float_type': 0.4, 'positive_float_type': 0.01, 'nonnegative_float_type': 0.1, 'string_type': 'abc', 'boolean_type': True, 'option_string_type': 'abc' } for key, val in sample_inputs.items(): spec_object = d['inputs'][key]['spec'] spec_type = spec_object['attribute_type'] submitted_input_or_output_class = submitted_operation_input_or_output_mapping[ spec_type] submitted_input_or_output_class(self.regular_user_1, None, None, key, val, spec_object)
def test_feature_inputs(self): ''' Tests that the inputs are properly validated when they correspond to an input type of `Feature` ''' f = os.path.join(TESTDIR, 'feature_set_test.json') d = read_operation_json(f) clazz = submitted_operation_input_or_output_mapping['Feature'] valid_feature_1 = { 'id': 'foo', 'attributes': { 'treatment': { 'attribute_type': 'String', 'value': 'A' } } } valid_feature_2 = {'id': 'foo'} invalid_feature = {'attributes': {'treatment': 'A'}} # test that we are fine with a valid input: x = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_1, d['inputs']['feature_type']) y = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_2, d['inputs']['feature_type']) self.assertDictEqual(x.get_value(), valid_feature_1) self.assertDictEqual(y.get_value(), {'id': 'foo', 'attributes': {}}) with self.assertRaises(ValidationError): clazz(self.regular_user_1, None, None, 'xyz', invalid_feature, d['inputs']['feature_type'])
def test_user_input_validation(self, mock_get_operation_instance_data): ''' Test that we receive back an appropriate object following successful validation. All the inputs below are valid ''' f = os.path.join( TESTDIR, 'sample_for_basic_types_no_default.json' ) d = read_operation_json(f) mock_get_operation_instance_data.return_value = d # some valid user inputs corresponding to the input specifications sample_inputs = { 'int_no_default_type': 10, 'positive_int_no_default_type': 3, 'nonnegative_int_no_default_type': 0, 'bounded_int_no_default_type': 2, 'float_no_default_type':0.2, 'bounded_float_no_default_type': 0.4, 'positive_float_no_default_type': 0.01, 'nonnegative_float_no_default_type': 0.1, 'string_no_default_type': 'abc', 'boolean_no_default_type': True } workspaces = Workspace.objects.all() if len(workspaces) == 0: raise ImproperlyConfigured('Need at least one Workspace to run this test.') validate_operation_inputs(self.regular_user_1, sample_inputs, self.db_op, self.workspace)
def test_list_attr_inputs(self, mock_get_operation_instance_data): ''' Test the case where inputs are of a list type (e.g. a list of strings) Check that it all validates as expected ''' # first test one where we expect an empty list-- no resources # are used or created: f = os.path.join( TESTDIR, 'valid_op_with_list_inputs.json' ) d = read_operation_json(f) mock_get_operation_instance_data.return_value = d l1 = ['https://foo.com/bar', 'https://foo.com/baz'] l2 = ['abc', 'def'] inputs = { 'link_list': l1, 'regular_string_list': l2 } ops = OperationDbModel.objects.all() op = ops[0] result = validate_operation_inputs(self.regular_user_1, inputs, op, None) self.assertIsNone(result['optional_input']) self.assertCountEqual(result['link_list'].get_value(), l1) self.assertCountEqual(result['regular_string_list'].get_value(), l2)
def test_read_operation_json(self, mock_read_local_file): # test that a properly formatted file returns # a dict as expected: fp = open(self.filepath) mock_read_local_file.return_value = fp d = read_operation_json(self.filepath) self.assertDictEqual(d, self.valid_dict)
def convert(self, input_key, user_input, op_dir, staging_dir): operation_json_filepath = os.path.join( op_dir, settings.OPERATION_SPEC_FILENAME) op_spec = read_operation_json(operation_json_filepath) spec = op_spec['inputs'][input_key]['spec'] min_val = spec['min'] max_val = spec['max'] f = BoundedFloatAttribute(user_input, min=min_val, max=max_val) return {input_key: f.value}
def test_defaults_for_non_required_inputs(self): ''' Certain inputs may not be required by the user. In that case, check that the defaults are properly entered as the value ''' f = os.path.join(TESTDIR, 'sample_for_basic_types.json') d = read_operation_json(f) # try to create objects for each- ensure they raise an exception: for key, op_input in d['inputs'].items(): spec_object = op_input['spec'] spec_type = spec_object['attribute_type'] submitted_input_or_output_class = submitted_operation_input_or_output_mapping[ spec_type] # can pass None for the workspace arg since we don't use it when checking the basic types submitted_input_or_output_class(self.regular_user_1, None, None, key, None, spec_object)
def test_no_default_for_required_param(self, mock_get_operation_instance_data): ''' Test that a missing required parameter triggers a validation error ''' f = os.path.join( TESTDIR, 'required_without_default.json' ) d = read_operation_json(f) mock_get_operation_instance_data.return_value = d # one input was optional, one required. An empty payload # qualifies as a problem since it's missing the required key sample_inputs = {} with self.assertRaises(ValidationError): validate_operation_inputs(self.regular_user_1, sample_inputs, self.db_op, self.workspace)
def test_optional_without_default_becomes_none(self, mock_get_operation_instance_data): ''' Generally, Operations with optional inputs should have defaults. However, if that is violated, the "input" should be assigned to be None ''' f = os.path.join( TESTDIR, 'optional_without_default.json' ) d = read_operation_json(f) mock_get_operation_instance_data.return_value = d # the only input is optional, so this is technically fine. sample_inputs = {} #with self.assertRaises(ValidationError): final_inputs = validate_operation_inputs(self.regular_user_1, sample_inputs, self.db_op, self.workspace) self.assertIsNone(final_inputs['optional_int_type'])
def test_optional_value_overridden(self, mock_get_operation_instance_data): ''' Test that the optional parameter is overridden when given ''' f = os.path.join( TESTDIR, 'required_without_default.json' ) d = read_operation_json(f) mock_get_operation_instance_data.return_value = d sample_inputs = { 'required_int_type': 22, 'optional_int_type': 33 } final_inputs = validate_operation_inputs(self.regular_user_1, sample_inputs, self.db_op, self.workspace) self.assertEqual(final_inputs['required_int_type'].submitted_value, 22) self.assertEqual(final_inputs['optional_int_type'].submitted_value, 33)
def test_optional_boolean_value_filled_by_default(self, mock_get_operation_instance_data): ''' Test that a missing optional boolean parameter gets the default value ''' f = os.path.join( TESTDIR, 'valid_op_with_default_bool.json' ) d = read_operation_json(f) mock_get_operation_instance_data.return_value = d # one input was optional, one required. An empty payload # qualifies as a problem since it's missing the required key sample_inputs = {} final_inputs = validate_operation_inputs(self.regular_user_1, sample_inputs, self.db_op, self.workspace) self.assertEqual(final_inputs['some_boolean'].submitted_value, False) expected_default = d['inputs']['some_boolean']['spec']['default'] self.assertEqual( final_inputs['some_boolean'].submitted_value, expected_default)
def test_bad_basic_user_inputs(self): ''' This tests that the proper validation happens when comparing the user-submitted values and the input specifications. Here, the user inputs violate the type constraints ''' f = os.path.join(TESTDIR, 'sample_for_basic_types_no_default.json') d = read_operation_json(f) # some INvalid user inputs corresponding to the input specifications sample_inputs = { 'int_no_default_type': 10.5, 'positive_int_no_default_type': -3, 'nonnegative_int_no_default_type': -10, 'bounded_int_no_default_type': 22222, 'float_no_default_type': 'abc', 'bounded_float_no_default_type': 10000.4, 'positive_float_no_default_type': -10.01, 'nonnegative_float_no_default_type': -0.1, 'string_no_default_type': '.*', 'boolean_no_default_type': 'abc', 'option_string_no_default_type': 'zzz' } # try to create objects for each- ensure they raise an exception: for key, val in sample_inputs.items(): spec_object = d['inputs'][key]['spec'] spec_type = spec_object['attribute_type'] submitted_input_or_output_class = submitted_operation_input_or_output_mapping[ spec_type] with self.assertRaises(ValidationError): # can pass None for the workspace arg since we don't use it when checking the basic types # Also pass None for the Operation argument. None of the basic attributes require that. submitted_input_or_output_class(self.regular_user_1, None, None, key, val, spec_object)
def ingest_dir(staging_dir, op_uuid, git_hash, repo_name, repository_url, overwrite=False): # Parse the JSON file defining this new Operation: operation_json_filepath = os.path.join(staging_dir, settings.OPERATION_SPEC_FILENAME) j = read_operation_json(operation_json_filepath) # extra parameters for an Operation that are not required # to be specified by the developer who wrote the `Operation` add_required_keys_to_operation(j, id=op_uuid, git_hash = git_hash, repository_url = repository_url, repo_name = repo_name ) # attempt to validate the data for the operation: try: op_serializer = validate_operation(j) except ValidationError as ex: logger.info('A validation error was raised when validating' ' the information parsed from {path}. Exception was: {ex}.\n ' 'Full info was: {j}'.format( path = operation_json_filepath, j = json.dumps(j, indent=2), ex = ex ) ) raise ex except Exception as ex: logger.info('An unexpected error was raised when validating' ' the information parsed from {path}. Exception was: {ex}.\n ' 'Full info was: {j}'.format( path = operation_json_filepath, j = json.dumps(j, indent=2), ex = ex ) ) raise ex # get an instance of the Operation (the data structure, NOT the database model) op = op_serializer.get_instance() op_data = op.to_dict() #op_data = OperationSerializer(op).data logging.info('After parsing operation spec, we have: {spec}'.format(spec=op_data)) # check that the required files, etc. are there for the particular run mode: check_required_files(op_data, staging_dir) # handle any operation-specific resources/files: handle_operation_specific_resources(op_data, staging_dir, op_uuid) # prepare any elements required for running the operation: prepare_operation(op_data, staging_dir, repo_name, git_hash) # save the operation in a final location: save_operation(op_data, staging_dir, overwrite) # update the database instance. try: o = OperationDbModel.objects.get(id=op.id) o.name = op.name o.active = True o.successful_ingestion = True o.workspace_operation = op_data['workspace_operation'] o.save() except OperationDbModel.DoesNotExist: logger.error('Could not find the Operation corresponding to' ' id={u}'.format(u=op_uuid) ) raise Exception('Encountered issue when trying update an Operation' ' database instance after ingesting from repository.' )
def test_check_for_resource_operations_case3(self, mock_get_operation_instance_data): ''' When removing a Resource from a Workspace, we need to ensure we are not removing a file that has been used in one or more ExecutedOperations. Below, we check where a file HAS been used, but the analysis failed. Hence, it's safe to remove since it was not used to create anything. ''' # need to create an ExecutedOperation that is based on a known # Operation and part of an existing workspace. Also need to ensure # that there is a Resource that is being used in that Workspace all_workspaces = Workspace.objects.all() workspace_with_resource = None for w in all_workspaces: if len(w.resources.all()) > 0: workspace_with_resource = w if workspace_with_resource is None: raise ImproperlyConfigured('Need at least one Workspace that has' ' at least a single Resource.' ) ops = Operation.objects.all() if len(ops) > 0: op = ops[0] else: raise ImproperlyConfigured('Need at least one Operation' ' to use for this test' ) f = os.path.join( TESTDIR, 'valid_workspace_operation.json' ) op_data = read_operation_json(f) mock_get_operation_instance_data.return_value = op_data executed_op_pk = uuid.uuid4() # the op_data we get from above has two outputs, one of which # is a DataResource. Just to be sure everything is consistent # between the spec and our mocked inputs below, we do this assert: input_keyset = list(op_data['inputs'].keys()) self.assertCountEqual(input_keyset, ['count_matrix','p_val']) mock_used_resource = workspace_with_resource.resources.all()[0] mock_validated_inputs = { 'count_matrix': str(mock_used_resource.pk), 'p_val': 0.01 } ex_op = WorkspaceExecutedOperation.objects.create( id=executed_op_pk, owner = self.regular_user_1, workspace = workspace_with_resource, job_name = 'abc', inputs = mock_validated_inputs, outputs = {}, operation = op, mode = op_data['mode'], status = ExecutedOperation.COMPLETION_ERROR, job_failed = True ) was_used = check_for_resource_operations(mock_used_resource, workspace_with_resource) self.assertFalse(was_used)
def test_observation_set_inputs(self): ''' Tests that the inputs are properly validated when they correspond to an input type of `ObservationSet` ''' f = os.path.join(TESTDIR, 'obs_set_test.json') d = read_operation_json(f) clazz = submitted_operation_input_or_output_mapping['ObservationSet'] valid_obs_1 = { 'id': 'foo', 'attributes': { 'treatment': { 'attribute_type': 'String', 'value': 'A' } } } valid_obs_2 = { 'id': 'bar', 'attributes': { 'treatment': { 'attribute_type': 'String', 'value': 'B' } } } valid_obs_set = { 'multiple': True, 'elements': [valid_obs_1, valid_obs_2] } # test that we are fine with a valid input: x = clazz(self.regular_user_1, None, None, 'xyz', valid_obs_set, d['inputs']['obs_set_type']) val = x.get_value() self.assertEqual(val['multiple'], valid_obs_set['multiple']) self.assertCountEqual(val['elements'], valid_obs_set['elements']) # an empty element set is technically valid empty_obs_set = {'multiple': True, 'elements': []} x = clazz(self.regular_user_1, None, None, 'xyz', empty_obs_set, d['inputs']['obs_set_type']) val = x.get_value() self.assertCountEqual(val['elements'], []) invalid_obs_set = { 'multiple': False, 'elements': [valid_obs_1, valid_obs_2] } # the >1 elements coupled with multiple=False makes this an invalid ObservationSet with self.assertRaises(ValidationError): clazz(self.regular_user_1, None, None, 'xyz', invalid_obs_set, d['inputs']['obs_set_type']) valid_obs_set = { 'multiple': True, 'elements': [ valid_obs_1, { 'id': 'baz' } # missing the 'attributes' key, but that is OK ] } clazz(self.regular_user_1, None, None, 'xyz', valid_obs_set, d['inputs']['obs_set_type']) invalid_obs_set = { 'multiple': True, 'elements': [ valid_obs_1, {} # missing the 'id' key, which is required ] } # missing 'id' causes the nested Observation to be invalid with self.assertRaises(ValidationError): clazz(self.regular_user_1, None, None, 'xyz', invalid_obs_set, d['inputs']['obs_set_type'])
def test_feature_set_inputs(self): ''' Tests that the inputs are properly validated when they correspond to an input type of `FeatureSet` ''' f = os.path.join(TESTDIR, 'feature_set_test.json') d = read_operation_json(f) clazz = submitted_operation_input_or_output_mapping['FeatureSet'] valid_feature_1 = {'id': 'foo', 'attributes': {}} valid_feature_2 = {'id': 'bar', 'attributes': {}} valid_feature_set = { 'multiple': True, 'elements': [valid_feature_1, valid_feature_2] } # test that we are fine with a valid input: x = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_set, d['inputs']['feature_set_type']) val = x.get_value() self.assertEqual(val['multiple'], valid_feature_set['multiple']) self.assertCountEqual(val['elements'], valid_feature_set['elements']) # this featureset has zero elements. It's technically valid empty_feature_set = {'multiple': True, 'elements': []} x = clazz(self.regular_user_1, None, None, 'xyz', empty_feature_set, d['inputs']['feature_set_type']) val = x.get_value() self.assertCountEqual(val['elements'], []) invalid_feature_set = { 'multiple': False, 'elements': [valid_feature_1, valid_feature_2] } # the >1 elements coupled with multiple=False makes this an invalid FeatureSet with self.assertRaises(ValidationError): clazz(self.regular_user_1, None, None, 'xyz', invalid_feature_set, d['inputs']['feature_set_type']) valid_feature_set2 = { 'multiple': True, 'elements': [ valid_feature_1, { 'id': 'bar' } # missing the 'attributes' key, but that is OK ] } x = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_set2, d['inputs']['feature_set_type']) # note that we compare against the original valid_feature_set. # This is because our methods add the empty 'attributes' key. # Therefore, a strict comparison of valid_feature_set2 would not be possible # as we designed THAT dict to be missing the 'attributes' key. val = x.get_value() self.assertEqual(val['multiple'], valid_feature_set['multiple']) self.assertCountEqual(val['elements'], valid_feature_set['elements']) invalid_feature_set = { 'multiple': True, 'elements': [ valid_feature_1, {} # missing the 'id' key, which is required ] } # missing 'id' causes the nested Feature to be invalid with self.assertRaises(ValidationError): clazz(self.regular_user_1, None, None, 'xyz', invalid_feature_set, d['inputs']['feature_set_type'])