def test_valid_templating(self): template_string = self.template_string.replace('value: "CCO"', 'value: "$my_smiles$"') template_string = template_string.replace('value: 75', 'value: $conversion$') df = pd.DataFrame.from_dict({ '$my_smiles$': ['CCO', 'CCCO', 'CCCCO'], '$conversion$': [75, 50, 30], }) dataset = templating.generate_dataset(template_string, df) expected_reactions = [] for smiles, conversion in zip(['CCO', 'CCCO', 'CCCCO'], [75, 50, 30]): reaction = reaction_pb2.Reaction() reaction.CopyFrom(self.valid_reaction) reaction.inputs['in'].components[0].identifiers[0].value = smiles reaction.outcomes[0].conversion.value = conversion expected_reactions.append(reaction) expected_dataset = dataset_pb2.Dataset(reactions=expected_reactions) self.assertEqual(dataset, expected_dataset) # Test without "$" in column names df = pd.DataFrame.from_dict({ 'my_smiles': ['CCO', 'CCCO', 'CCCCO'], 'conversion': [75, 50, 30], }) dataset = templating.generate_dataset(template_string, df) self.assertEqual(dataset, expected_dataset)
def test_bad_placeholders(self): template_string = self.template_string.replace('value: "CCO"', 'value: "$my_smiles$"') template_string = template_string.replace('value: 75', 'value: $conversion$') df = pd.DataFrame.from_dict({ '$my_smiles$': ['CCO', 'CCCO', 'CCCCO'], }) with self.assertRaisesRegex(ValueError, r'\$conversion\$ not found'): templating.generate_dataset(template_string, df)
def enumerate_dataset(): """Creates a new dataset based on a template reaction and a spreadsheet. Three pieces of information are expected to be POSTed in a json object: spreadsheet_name: the original filename of the uploaded spreadsheet. spreadsheet_data: a base64-encoded string containing the contents of the spreadsheet. template_string: a string containing a text-formatted Reaction proto, i.e., the contents of a pbtxt file. A new dataset is created from the template and spreadsheet using ord_schema.templating.generate_dataset. """ try: data = flask.request.get_json(force=True) basename, suffix = os.path.splitext(data['spreadsheet_name']) if data['spreadsheet_data'].startswith('data:'): # Remove the data URL prefix; see # https://developer.mozilla.org/en-US/docs/Web/API/FileReader/readAsDataURL. match = re.fullmatch('data:.*?;base64,(.*)', data['spreadsheet_data']) spreadsheet_data = match.group(1) else: spreadsheet_data = data['spreadsheet_data'] spreadsheet_data = io.BytesIO(base64.b64decode(spreadsheet_data)) dataframe = templating.read_spreadsheet(spreadsheet_data, suffix=suffix) dataset = templating.generate_dataset(data['template_string'], dataframe, validate=False) put_dataset(f'{basename}_dataset', dataset) return 'ok' except Exception as error: # pylint: disable=broad-except flask.abort(flask.make_response(str(error), 406))
def test_invalid_templating(self): template_string = self.template_string.replace('value: "CCO"', 'value: "$my_smiles$"') template_string = template_string.replace('precision: 99', 'precision: $precision$') df = pd.DataFrame.from_dict({ '$my_smiles$': ['CCO', 'CCCO', 'CCCCO'], '$precision$': [75, 50, -5], }) expected_reactions = [] for smiles, precision in zip(['CCO', 'CCCO', 'CCCCO'], [75, 50, -5]): reaction = reaction_pb2.Reaction() reaction.CopyFrom(self.valid_reaction) reaction.inputs['in'].components[0].identifiers[0].value = smiles reaction.outcomes[0].conversion.precision = precision expected_reactions.append(reaction) expected_dataset = dataset_pb2.Dataset(reactions=expected_reactions) with self.assertRaisesRegex(ValueError, 'Enumerated Reaction is not valid'): templating.generate_dataset(template_string, df) dataset = templating.generate_dataset(template_string, df, validate=False) self.assertEqual(dataset, expected_dataset)
def main(argv): del argv # Only used by app.run(). with open(FLAGS.template) as f: template_string = f.read() df = templating.read_spreadsheet(FLAGS.spreadsheet) logging.info('generating new Dataset from %s and %s', FLAGS.template, FLAGS.spreadsheet) dataset = templating.generate_dataset(template_string, df, validate=FLAGS.validate) if FLAGS.output: output_filename = FLAGS.output else: basename, _ = os.path.splitext(FLAGS.spreadsheet) output_filename = os.path.join(f'{basename}_dataset.pbtxt') logging.info('writing new Dataset to %s', output_filename) message_helpers.write_message(dataset, output_filename)
def test_missing_values(self): # pylint: disable=too-many-locals # Build a template reaction. reaction = reaction_pb2.Reaction() input1 = reaction.inputs['one'] input1_component1 = input1.components.add() input1_component1.identifiers.add(value='CCO', type='SMILES') input1_component1.amount.mass.value = 1.2 input1_component1.amount.mass.units = reaction_pb2.Mass.GRAM input1_component2 = input1.components.add() input1_component2.is_limiting = True input1_component2.identifiers.add(value='c1ccccc1', type='SMILES') input1_component2.amount.volume.value = 3.4 input1_component2.amount.volume.units = reaction_pb2.Volume.LITER input2 = reaction.inputs['two'] input2_component1 = input2.components.add() input2_component1.identifiers.add(value='COO', type='SMILES') input2_component1.amount.mass.value = 5.6 input2_component1.amount.mass.units = reaction_pb2.Mass.GRAM outcome = reaction.outcomes.add() outcome.conversion.value = 75 template_string = text_format.MessageToString(reaction) template_string = template_string.replace('value: "CCO"', 'value: "$smiles$"') template_string = template_string.replace('value: 5.6', 'value: $mass$') # Build a spreadsheet and test for proper edits. filename = os.path.join(self.test_subdirectory, 'missing.csv') with open(filename, 'w') as f: f.write('smiles,mass\n') f.write('CN,\n') # Missing mass. f.write(',1.5\n') # Missing SMILES. df = pd.read_csv(filename) dataset = templating.generate_dataset(template_string, df) expected_dataset = dataset_pb2.Dataset() reaction1 = expected_dataset.reactions.add() reaction1.CopyFrom(reaction) reaction1.inputs['one'].components[0].identifiers[0].value = 'CN' del reaction1.inputs['two'] # No components after amount removal. reaction2 = expected_dataset.reactions.add() reaction2.CopyFrom(reaction) del reaction2.inputs['one'].components[0] # No indentifiers. reaction2.inputs['two'].components[0].amount.mass.value = 1.5 self.assertEqual(dataset, expected_dataset)