Ejemplo n.º 1
0
 def setUp(self):
     super().setUp()
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     os.chdir(self.test_subdirectory)
     subprocess.run(['git', 'init'], check=True)
     subprocess.run(
         ['git', 'config', '--local', 'user.email', 'test@ord-schema'],
         check=True)
     subprocess.run(
         ['git', 'config', '--local', 'user.name', 'Test Runner'],
         check=True)
     # Add some initial data.
     reaction = reaction_pb2.Reaction()
     methylamine = reaction.inputs['methylamine']
     component = methylamine.components.add()
     component.identifiers.add(type='SMILES', value='CN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 1
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 75
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.reaction_id = 'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'
     dataset_id = 'ord_dataset-64b14868c5cd46dd8e75560fd3589a6b'
     dataset = dataset_pb2.Dataset(reactions=[reaction],
                                   dataset_id=dataset_id)
     # Make sure the initial dataset is valid.
     validations.validate_message(dataset)
     os.makedirs(os.path.join('data', '64'))
     self.dataset_filename = os.path.join(self.test_subdirectory, 'data',
                                          '64', f'{dataset_id}.pbtxt')
     message_helpers.write_message(dataset, self.dataset_filename)
     subprocess.run(['git', 'add', 'data'], check=True)
     subprocess.run(['git', 'commit', '-m', 'Initial commit'], check=True)
Ejemplo n.º 2
0
 def test_datetimes(self):
     message = reaction_pb2.ReactionProvenance()
     message.experiment_start.value = '11 am'
     message.record_created.time.value = '10 am'
     with self.assertRaisesRegex(validations.ValidationError, 'after'):
         validations.validate_message(message)
     message.record_created.time.value = '11:15 am'
     self.assertEmpty(validations.validate_message(message))
Ejemplo n.º 3
0
 def test_data(self):
     message = reaction_pb2.Data()
     with self.assertRaisesRegex(validations.ValidationError,
                                 'requires one of'):
         validations.validate_message(message)
     message.bytes_value = b'test data'
     with self.assertRaisesRegex(validations.ValidationError,
                                 'format is required'):
         validations.validate_message(message)
     message.value = 'test data'
     self.assertEmpty(validations.validate_message(message))
Ejemplo n.º 4
0
 def test_compound_rdkit_binary(self):
     mol = Chem.MolFromSmiles('CC(=O)OC1=CC=CC=C1C(=O)O')
     message = reaction_pb2.Compound()
     identifier = message.identifiers.add()
     identifier.type = identifier.SMILES
     identifier.value = Chem.MolToSmiles(mol)
     validations.validate_message(message)  # Message is modified in place.
     self.assertEqual(
         message.identifiers[1],
         reaction_pb2.CompoundIdentifier(type='RDKIT_BINARY',
                                         bytes_value=mol.ToBinary()))
Ejemplo n.º 5
0
 def test_main(self):
     output_filename = os.path.join(self.test_subdirectory, 'dataset.pbtxt')
     with flagsaver.flagsaver(template=self.template,
                              spreadsheet=self.spreadsheet,
                              output=output_filename):
         enumerate_dataset.main(())
     self.assertTrue(os.path.exists(output_filename))
     dataset = message_helpers.load_message(output_filename,
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 3)
     validations.validate_message(dataset, raise_on_error=True)
     self.assertEqual(dataset, self.expected)
Ejemplo n.º 6
0
 def test_compound_name_resolver(self):
     message = reaction_pb2.Compound()
     identifier = message.identifiers.add()
     identifier.type = identifier.NAME
     identifier.value = 'aspirin'
     validations.validate_message(message)  # Message is modified in place.
     self.assertEqual(
         message.identifiers[1],
         reaction_pb2.CompoundIdentifier(
             type='SMILES',
             value='CC(=O)OC1=CC=CC=C1C(=O)O',
             details='NAME resolved by PubChem'))
Ejemplo n.º 7
0
def _validate_dataset(filename, dataset):
    """Validates Reaction messages in a Dataset.

    Note that validation may change the message. For example, NAME
    identifiers will be resolved to structures.

    Args:
        filename: Text filename; the dataset source.
        dataset: dataset_pb2.Dataset message.

    Returns:
        List of validation error messages.
    """
    basename = os.path.basename(filename)
    errors = []
    num_bad_reactions = 0
    for i, reaction in enumerate(dataset.reactions):
        reaction_errors = validations.validate_message(reaction,
                                                       raise_on_error=False)
        if reaction_errors:
            num_bad_reactions += 1
        for error in reaction_errors:
            errors.append(error)
            logging.warning('Validation error for %s[%d]: %s', basename, i,
                            error)
    logging.info('Validation summary for %s: %d/%d successful (%d failures)',
                 basename,
                 len(dataset.reactions) - num_bad_reactions,
                 len(dataset.reactions), num_bad_reactions)
    return errors
Ejemplo n.º 8
0
 def _run_validation(self, message, **kwargs):
     original = type(message)()
     original.CopyFrom(message)
     output = validations.validate_message(message, **kwargs)
     # Verify that `message` is unchanged by the validation process.
     self.assertEqual(original, message)
     return output
Ejemplo n.º 9
0
def validate_reaction(message_name):
    """Receives a serialized Reaction protobuf and runs validations."""
    message = message_helpers.create_message(message_name)
    message.ParseFromString(flask.request.get_data())
    options = validations.ValidationOptions(require_provenance=True)
    output = validations.validate_message(message,
                                          raise_on_error=False,
                                          options=options)
    return json.dumps({'errors': output.errors, 'warnings': output.warnings})
Ejemplo n.º 10
0
 def test_reaction_recursive_noraise_on_error(self):
     message = reaction_pb2.Reaction()
     message.inputs['dummy_input'].components.add()
     errors = validations.validate_message(message, raise_on_error=False)
     expected = [
         'Compounds must have at least one identifier',
         "Reaction input's components require an amount",
         'Reactions should have at least 1 reaction outcome',
     ]
     self.assertEqual(errors, expected)
Ejemplo n.º 11
0
def validate_reaction(message_name):
    """Receives a serialized Reaction protobuf and runs validations."""
    message = message_helpers.create_message(message_name)
    message.ParseFromString(flask.request.get_data())
    if message == type(message)():
        # Do not try to validate empty messages.
        return json.dumps({"errors": [], "warnings": []})
    options = validations.ValidationOptions(require_provenance=True)
    output = validations.validate_message(message,
                                          raise_on_error=False,
                                          options=options)
    errors = list(map(_adjust_error, output.errors))
    warnings = list(map(_adjust_error, output.warnings))
    return json.dumps({"errors": errors, "warnings": warnings})
Ejemplo n.º 12
0
def generate_dataset(template_string, df, validate=True):
    """Generates a Dataset by enumerating a template reaction.

    Args:
        template_string: The contents of a Reaction pbtxt where placeholder
            values to be replaced are defined between dollar signs. For example,
            a SMILES identifier value could be "$product_smiles$". PLaceholders
            may only use letters, numbers, and underscores.
        df: Pandas Dataframe where each row corresponds to one reaction and
            column names match placeholders in the template_string.
        validate: Optional Boolean controlling whether Reaction messages should
            be validated as they are defined. Defaults to True.

    Returns:
        A Dataset message.

    Raises:
        ValueError: If there is no match for a placeholder string in df.
        ValueError: If validate is True and there are validation errors when
            validating an enumerated Reaction message.

    """
    placeholders = set(re.findall(r'\$\w+\$', template_string))
    for placeholder in placeholders:
        if placeholder not in df.columns:
            # Allow "$my_placeholder$" to match "my_placeholder" in df.
            if placeholder[1:-1] not in df.columns:
                raise ValueError(f'Placeholder {placeholder} not found as a'
                                 ' column in dataset spreadsheet')
            df.rename(columns={placeholder[1:-1]: placeholder}, inplace=True)

    reactions = []
    for _, substitutions in df[placeholders].iterrows():
        reaction = _fill_template(template_string, substitutions)
        if validate:
            output = validations.validate_message(reaction,
                                                  raise_on_error=False)
            if output.errors:
                raise ValueError(
                    f'Enumerated Reaction is not valid: {output.errors}')
        reactions.append(reaction)

    return dataset_pb2.Dataset(reactions=reactions)
Ejemplo n.º 13
0
 def test_orcid(self):
     message = reaction_pb2.Person(orcid='0000-0001-2345-678X')
     self.assertEmpty(validations.validate_message(message))
Ejemplo n.º 14
0
 def test_record_id(self):
     message = reaction_pb2.ReactionProvenance()
     message.record_created.time.value = '10 am'
     message.record_id = 'ord-c0bbd41f095a44a78b6221135961d809'
     self.assertEmpty(validations.validate_message(message))
Ejemplo n.º 15
0
 def test_bad_record_id(self, record_id):
     message = reaction_pb2.ReactionProvenance()
     message.record_created.time.value = '10 am'
     message.record_id = record_id
     with self.assertRaisesRegex(validations.ValidationError, 'malformed'):
         validations.validate_message(message)
Ejemplo n.º 16
0
 def test_units(self, message):
     self.assertEmpty(validations.validate_message(message))
Ejemplo n.º 17
0
 def test_units_should_fail(self, message, expected_error):
     with self.assertRaisesRegex(validations.ValidationError,
                                 expected_error):
         validations.validate_message(message)
Ejemplo n.º 18
0
    def test_reaction_recursive(self):
        message = reaction_pb2.Reaction()
        # Reactions must have at least one input
        with self.assertRaisesRegex(validations.ValidationError,
                                    'reaction input'):
            validations.validate_message(message, recurse=False)
        dummy_input = message.inputs['dummy_input']
        # Reactions must have at least one outcome
        with self.assertRaisesRegex(validations.ValidationError,
                                    'reaction outcome'):
            validations.validate_message(message, recurse=False)
        outcome = message.outcomes.add()
        self.assertEmpty(validations.validate_message(message, recurse=False))
        # Inputs must have at least one component
        with self.assertRaisesRegex(validations.ValidationError, 'component'):
            validations.validate_message(message)
        dummy_component = dummy_input.components.add()
        # Components must have at least one identifier
        with self.assertRaisesRegex(validations.ValidationError, 'identifier'):
            validations.validate_message(message)
        dummy_component.identifiers.add(type='CUSTOM')
        # Custom identifiers must have details specified
        with self.assertRaisesRegex(validations.ValidationError, 'details'):
            validations.validate_message(message)
        dummy_component.identifiers[0].details = 'custom_identifier'
        dummy_component.identifiers[0].value = 'custom_value'
        # Components of reaction inputs must have a defined amount
        with self.assertRaisesRegex(validations.ValidationError,
                                    'require an amount'):
            validations.validate_message(message)
        dummy_component.mass.value = 1
        dummy_component.mass.units = reaction_pb2.Mass.GRAM
        # Reactions must have defined products or conversion
        with self.assertRaisesRegex(validations.ValidationError,
                                    'products or conversion'):
            validations.validate_message(message)
        outcome.conversion.value = 75
        # If converseions are defined, must have limiting reagent flag
        with self.assertRaisesRegex(validations.ValidationError,
                                    'is_limiting'):
            validations.validate_message(message)
        dummy_component.is_limiting = True
        self.assertEmpty(validations.validate_message(message))

        # If an analysis uses an internal standard, a component must have
        # an INTERNAL_STANDARD reaction role
        outcome.analyses['dummy_analysis'].uses_internal_standard = True
        with self.assertRaisesRegex(validations.ValidationError,
                                    'INTERNAL_STANDARD'):
            validations.validate_message(message)
        # Assigning internal standard role to input should resolve the error
        message_input_istd = reaction_pb2.Reaction()
        message_input_istd.CopyFrom(message)
        message_input_istd.inputs['dummy_input'].components[
            0].reaction_role = (
                reaction_pb2.Compound.ReactionRole.INTERNAL_STANDARD)
        self.assertEmpty(validations.validate_message(message_input_istd))
        # Assigning internal standard role to workup should resolve the error
        message_workup_istd = reaction_pb2.Reaction()
        message_workup_istd.CopyFrom(message)
        workup = message_workup_istd.workup.add()
        istd = workup.components.add()
        istd.identifiers.add(type='SMILES', value='CCO')
        istd.mass.value = 1
        istd.mass.units = reaction_pb2.Mass.GRAM
        istd.reaction_role = istd.ReactionRole.INTERNAL_STANDARD
        self.assertEmpty(validations.validate_message(message_workup_istd))
Ejemplo n.º 19
0
 def test_reaction(self):
     message = reaction_pb2.Reaction()
     with self.assertRaisesRegex(validations.ValidationError,
                                 'reaction input'):
         validations.validate_message(message)
Ejemplo n.º 20
0
 def test_orcid_should_fail(self):
     message = reaction_pb2.Person(orcid='abcd-0001-2345-678X')
     with self.assertRaisesRegex(validations.ValidationError, 'Invalid'):
         validations.validate_message(message)