def setUp(self): super().setUp() self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir) os.chdir(self.test_subdirectory) subprocess.run(['git', 'init'], check=True) subprocess.run( ['git', 'config', '--local', 'user.email', 'test@ord-schema'], check=True) subprocess.run( ['git', 'config', '--local', 'user.name', 'Test Runner'], check=True) # Add some initial data. reaction = reaction_pb2.Reaction() methylamine = reaction.inputs['methylamine'] component = methylamine.components.add() component.identifiers.add(type='SMILES', value='CN') component.is_limiting = reaction_pb2.Boolean.TRUE component.moles.value = 1 component.moles.units = reaction_pb2.Moles.MILLIMOLE reaction.outcomes.add().conversion.value = 75 reaction.provenance.record_created.time.value = '2020-01-01' reaction.reaction_id = 'ord-10aed8b5dffe41fab09f5b2cc9c58ad9' dataset_id = 'ord_dataset-64b14868c5cd46dd8e75560fd3589a6b' dataset = dataset_pb2.Dataset(reactions=[reaction], dataset_id=dataset_id) # Make sure the initial dataset is valid. validations.validate_message(dataset) os.makedirs(os.path.join('data', '64')) self.dataset_filename = os.path.join(self.test_subdirectory, 'data', '64', f'{dataset_id}.pbtxt') message_helpers.write_message(dataset, self.dataset_filename) subprocess.run(['git', 'add', 'data'], check=True) subprocess.run(['git', 'commit', '-m', 'Initial commit'], check=True)
def test_datetimes(self): message = reaction_pb2.ReactionProvenance() message.experiment_start.value = '11 am' message.record_created.time.value = '10 am' with self.assertRaisesRegex(validations.ValidationError, 'after'): validations.validate_message(message) message.record_created.time.value = '11:15 am' self.assertEmpty(validations.validate_message(message))
def test_data(self): message = reaction_pb2.Data() with self.assertRaisesRegex(validations.ValidationError, 'requires one of'): validations.validate_message(message) message.bytes_value = b'test data' with self.assertRaisesRegex(validations.ValidationError, 'format is required'): validations.validate_message(message) message.value = 'test data' self.assertEmpty(validations.validate_message(message))
def test_compound_rdkit_binary(self): mol = Chem.MolFromSmiles('CC(=O)OC1=CC=CC=C1C(=O)O') message = reaction_pb2.Compound() identifier = message.identifiers.add() identifier.type = identifier.SMILES identifier.value = Chem.MolToSmiles(mol) validations.validate_message(message) # Message is modified in place. self.assertEqual( message.identifiers[1], reaction_pb2.CompoundIdentifier(type='RDKIT_BINARY', bytes_value=mol.ToBinary()))
def test_main(self): output_filename = os.path.join(self.test_subdirectory, 'dataset.pbtxt') with flagsaver.flagsaver(template=self.template, spreadsheet=self.spreadsheet, output=output_filename): enumerate_dataset.main(()) self.assertTrue(os.path.exists(output_filename)) dataset = message_helpers.load_message(output_filename, dataset_pb2.Dataset) self.assertLen(dataset.reactions, 3) validations.validate_message(dataset, raise_on_error=True) self.assertEqual(dataset, self.expected)
def test_compound_name_resolver(self): message = reaction_pb2.Compound() identifier = message.identifiers.add() identifier.type = identifier.NAME identifier.value = 'aspirin' validations.validate_message(message) # Message is modified in place. self.assertEqual( message.identifiers[1], reaction_pb2.CompoundIdentifier( type='SMILES', value='CC(=O)OC1=CC=CC=C1C(=O)O', details='NAME resolved by PubChem'))
def _validate_dataset(filename, dataset): """Validates Reaction messages in a Dataset. Note that validation may change the message. For example, NAME identifiers will be resolved to structures. Args: filename: Text filename; the dataset source. dataset: dataset_pb2.Dataset message. Returns: List of validation error messages. """ basename = os.path.basename(filename) errors = [] num_bad_reactions = 0 for i, reaction in enumerate(dataset.reactions): reaction_errors = validations.validate_message(reaction, raise_on_error=False) if reaction_errors: num_bad_reactions += 1 for error in reaction_errors: errors.append(error) logging.warning('Validation error for %s[%d]: %s', basename, i, error) logging.info('Validation summary for %s: %d/%d successful (%d failures)', basename, len(dataset.reactions) - num_bad_reactions, len(dataset.reactions), num_bad_reactions) return errors
def _run_validation(self, message, **kwargs): original = type(message)() original.CopyFrom(message) output = validations.validate_message(message, **kwargs) # Verify that `message` is unchanged by the validation process. self.assertEqual(original, message) return output
def validate_reaction(message_name): """Receives a serialized Reaction protobuf and runs validations.""" message = message_helpers.create_message(message_name) message.ParseFromString(flask.request.get_data()) options = validations.ValidationOptions(require_provenance=True) output = validations.validate_message(message, raise_on_error=False, options=options) return json.dumps({'errors': output.errors, 'warnings': output.warnings})
def test_reaction_recursive_noraise_on_error(self): message = reaction_pb2.Reaction() message.inputs['dummy_input'].components.add() errors = validations.validate_message(message, raise_on_error=False) expected = [ 'Compounds must have at least one identifier', "Reaction input's components require an amount", 'Reactions should have at least 1 reaction outcome', ] self.assertEqual(errors, expected)
def validate_reaction(message_name): """Receives a serialized Reaction protobuf and runs validations.""" message = message_helpers.create_message(message_name) message.ParseFromString(flask.request.get_data()) if message == type(message)(): # Do not try to validate empty messages. return json.dumps({"errors": [], "warnings": []}) options = validations.ValidationOptions(require_provenance=True) output = validations.validate_message(message, raise_on_error=False, options=options) errors = list(map(_adjust_error, output.errors)) warnings = list(map(_adjust_error, output.warnings)) return json.dumps({"errors": errors, "warnings": warnings})
def generate_dataset(template_string, df, validate=True): """Generates a Dataset by enumerating a template reaction. Args: template_string: The contents of a Reaction pbtxt where placeholder values to be replaced are defined between dollar signs. For example, a SMILES identifier value could be "$product_smiles$". PLaceholders may only use letters, numbers, and underscores. df: Pandas Dataframe where each row corresponds to one reaction and column names match placeholders in the template_string. validate: Optional Boolean controlling whether Reaction messages should be validated as they are defined. Defaults to True. Returns: A Dataset message. Raises: ValueError: If there is no match for a placeholder string in df. ValueError: If validate is True and there are validation errors when validating an enumerated Reaction message. """ placeholders = set(re.findall(r'\$\w+\$', template_string)) for placeholder in placeholders: if placeholder not in df.columns: # Allow "$my_placeholder$" to match "my_placeholder" in df. if placeholder[1:-1] not in df.columns: raise ValueError(f'Placeholder {placeholder} not found as a' ' column in dataset spreadsheet') df.rename(columns={placeholder[1:-1]: placeholder}, inplace=True) reactions = [] for _, substitutions in df[placeholders].iterrows(): reaction = _fill_template(template_string, substitutions) if validate: output = validations.validate_message(reaction, raise_on_error=False) if output.errors: raise ValueError( f'Enumerated Reaction is not valid: {output.errors}') reactions.append(reaction) return dataset_pb2.Dataset(reactions=reactions)
def test_orcid(self): message = reaction_pb2.Person(orcid='0000-0001-2345-678X') self.assertEmpty(validations.validate_message(message))
def test_record_id(self): message = reaction_pb2.ReactionProvenance() message.record_created.time.value = '10 am' message.record_id = 'ord-c0bbd41f095a44a78b6221135961d809' self.assertEmpty(validations.validate_message(message))
def test_bad_record_id(self, record_id): message = reaction_pb2.ReactionProvenance() message.record_created.time.value = '10 am' message.record_id = record_id with self.assertRaisesRegex(validations.ValidationError, 'malformed'): validations.validate_message(message)
def test_units(self, message): self.assertEmpty(validations.validate_message(message))
def test_units_should_fail(self, message, expected_error): with self.assertRaisesRegex(validations.ValidationError, expected_error): validations.validate_message(message)
def test_reaction_recursive(self): message = reaction_pb2.Reaction() # Reactions must have at least one input with self.assertRaisesRegex(validations.ValidationError, 'reaction input'): validations.validate_message(message, recurse=False) dummy_input = message.inputs['dummy_input'] # Reactions must have at least one outcome with self.assertRaisesRegex(validations.ValidationError, 'reaction outcome'): validations.validate_message(message, recurse=False) outcome = message.outcomes.add() self.assertEmpty(validations.validate_message(message, recurse=False)) # Inputs must have at least one component with self.assertRaisesRegex(validations.ValidationError, 'component'): validations.validate_message(message) dummy_component = dummy_input.components.add() # Components must have at least one identifier with self.assertRaisesRegex(validations.ValidationError, 'identifier'): validations.validate_message(message) dummy_component.identifiers.add(type='CUSTOM') # Custom identifiers must have details specified with self.assertRaisesRegex(validations.ValidationError, 'details'): validations.validate_message(message) dummy_component.identifiers[0].details = 'custom_identifier' dummy_component.identifiers[0].value = 'custom_value' # Components of reaction inputs must have a defined amount with self.assertRaisesRegex(validations.ValidationError, 'require an amount'): validations.validate_message(message) dummy_component.mass.value = 1 dummy_component.mass.units = reaction_pb2.Mass.GRAM # Reactions must have defined products or conversion with self.assertRaisesRegex(validations.ValidationError, 'products or conversion'): validations.validate_message(message) outcome.conversion.value = 75 # If converseions are defined, must have limiting reagent flag with self.assertRaisesRegex(validations.ValidationError, 'is_limiting'): validations.validate_message(message) dummy_component.is_limiting = True self.assertEmpty(validations.validate_message(message)) # If an analysis uses an internal standard, a component must have # an INTERNAL_STANDARD reaction role outcome.analyses['dummy_analysis'].uses_internal_standard = True with self.assertRaisesRegex(validations.ValidationError, 'INTERNAL_STANDARD'): validations.validate_message(message) # Assigning internal standard role to input should resolve the error message_input_istd = reaction_pb2.Reaction() message_input_istd.CopyFrom(message) message_input_istd.inputs['dummy_input'].components[ 0].reaction_role = ( reaction_pb2.Compound.ReactionRole.INTERNAL_STANDARD) self.assertEmpty(validations.validate_message(message_input_istd)) # Assigning internal standard role to workup should resolve the error message_workup_istd = reaction_pb2.Reaction() message_workup_istd.CopyFrom(message) workup = message_workup_istd.workup.add() istd = workup.components.add() istd.identifiers.add(type='SMILES', value='CCO') istd.mass.value = 1 istd.mass.units = reaction_pb2.Mass.GRAM istd.reaction_role = istd.ReactionRole.INTERNAL_STANDARD self.assertEmpty(validations.validate_message(message_workup_istd))
def test_reaction(self): message = reaction_pb2.Reaction() with self.assertRaisesRegex(validations.ValidationError, 'reaction input'): validations.validate_message(message)
def test_orcid_should_fail(self): message = reaction_pb2.Person(orcid='abcd-0001-2345-678X') with self.assertRaisesRegex(validations.ValidationError, 'Invalid'): validations.validate_message(message)