Ejemplo n.º 1
0
 def test_modify_dataset(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     # Modify the existing reaction...
     dataset.reactions[0].inputs['methylamine'].components[
         0].moles.value = 2
     # ...and add a new reaction.
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset.reactions.add().CopyFrom(reaction)
     message_helpers.write_message(dataset, self.dataset_filename)
     filenames = self._run_main()
     self.assertCountEqual([self.dataset_filename], filenames)
     # Check for preservation of dataset and record IDs.
     updated_dataset = message_helpers.load_message(self.dataset_filename,
                                                    dataset_pb2.Dataset)
     self.assertLen(updated_dataset.reactions, 2)
     self.assertEqual(dataset.dataset_id, updated_dataset.dataset_id)
     self.assertEqual(dataset.reactions[0].reaction_id,
                      updated_dataset.reactions[0].reaction_id)
     self.assertNotEmpty(updated_dataset.reactions[1].reaction_id)
Ejemplo n.º 2
0
 def test_bad_json(self):
     with tempfile.NamedTemporaryFile(mode='w+') as f:
         message = test_pb2.RepeatedScalar(values=[1.2, 3.4])
         f.write(json_format.MessageToJson(message))
         f.flush()
         with self.assertRaisesRegex(ValueError, 'no field named "values"'):
             message_helpers.load_message(f.name, test_pb2.Nested, 'json')
Ejemplo n.º 3
0
 def test_modify_dataset(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     # Modify the existing reaction...
     reaction1 = dataset.reactions[0]
     reaction1.inputs['methylamine'].components[0].amount.moles.value = 2
     # ...and add a new reaction.
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     dataset.reactions.add().CopyFrom(reaction)
     message_helpers.write_message(dataset, self.dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEqual(changed, {'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'})
     self.assertCountEqual([self.dataset_filename], filenames)
     # Check for preservation of dataset and record IDs.
     updated_dataset = message_helpers.load_message(self.dataset_filename,
                                                    dataset_pb2.Dataset)
     self.assertLen(updated_dataset.reactions, 2)
     self.assertEqual(dataset.dataset_id, updated_dataset.dataset_id)
     self.assertEqual(dataset.reactions[0].reaction_id,
                      updated_dataset.reactions[0].reaction_id)
     self.assertNotEmpty(updated_dataset.reactions[1].reaction_id)
Ejemplo n.º 4
0
 def test_bad_pbtxt(self):
     with tempfile.NamedTemporaryFile(mode='w+', suffix='.pbtxt') as f:
         message = test_pb2.RepeatedScalar(values=[1.2, 3.4])
         f.write(text_format.MessageToString(message))
         f.flush()
         with self.assertRaisesRegex(ValueError, 'no field named "values"'):
             message_helpers.load_message(f.name, test_pb2.Nested)
Ejemplo n.º 5
0
def test_download_dataset(client, filename, expected, tmp_path):
    response = client.get(f"/dataset/{filename}/download",
                          follow_redirects=True)
    assert response.status_code == expected
    if response.status_code == 200:
        # Make sure it parses.
        filename = (tmp_path / "dataset.pb").as_posix()
        with open(filename, "wb") as f:
            f.write(response.data)
        message_helpers.load_message(filename, dataset_pb2.Dataset)
Ejemplo n.º 6
0
 def test_download_dataset_with_kind(self, file_name, kind, expected):
     response = self.client.get(f'/dataset/{file_name}/download/{kind}',
                                follow_redirects=True)
     self.assertEqual(response.status_code, expected)
     if response.status_code == 200:
         # Make sure it parses.
         filename = os.path.join(self.test_directory, f'dataset.{kind}')
         with open(filename, 'wb') as f:
             f.write(response.data)
         message_helpers.load_message(filename, dataset_pb2.Dataset)
Ejemplo n.º 7
0
 def test_bad_binary(self):
     with tempfile.NamedTemporaryFile(suffix='.pb') as f:
         message = test_pb2.RepeatedScalar(values=[1.2, 3.4])
         f.write(message.SerializeToString())
         f.flush()
         # NOTE(kearnes): The decoder is not perfect; for example, it will
         # not be able to distinguish from a message with the same tags and
         # types (e.g. test_pb2.Scalar and test_pb2.RepeatedScalar).
         with self.assertRaisesRegex(ValueError, 'Error parsing message'):
             message_helpers.load_message(f.name, test_pb2.Nested)
Ejemplo n.º 8
0
 def test_add_sharded_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-02'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test1'
     dataset1 = dataset_pb2.Dataset(reactions=[reaction])
     dataset1_filename = os.path.join(self.test_subdirectory, 'test1.pbtxt')
     message_helpers.write_message(dataset1, dataset1_filename)
     reaction.provenance.record_created.time.value = '2020-01-03'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test2'
     dataset2 = dataset_pb2.Dataset(reactions=[reaction])
     dataset2_filename = os.path.join(self.test_subdirectory, 'test2.pbtxt')
     message_helpers.write_message(dataset2, dataset2_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test1', 'test2'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 2)
     self.assertFalse(os.path.exists(dataset1_filename))
     self.assertFalse(os.path.exists(dataset2_filename))
Ejemplo n.º 9
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d datasets', len(filenames))
    dois = collections.defaultdict(list)
    for filename in filenames:
        logging.info('Checking %s', filename)
        dataset = message_helpers.load_message(filename, dataset_pb2.Dataset)
        dataset_id = os.path.splitext(os.path.basename(filename))[0]
        assert dataset.dataset_id == dataset_id
        doi_set = set()
        for reaction in dataset.reactions:
            # Some poorly-validated DOI entries start with 'doi:'...
            match = re.fullmatch(r'(?:(?:doi)|(?:DOI))?:?\s*(.*)',
                                 reaction.provenance.doi)
            doi_set.add(match.group(1))
        for doi in doi_set:
            dois[doi].append(dataset_id)
    for doi in sorted(dois):
        print(f'* [{doi}](https://doi.org/{doi})')
        for dataset in sorted(dois[doi]):
            url = urllib.parse.urljoin(
                _PREFIX,
                message_helpers.id_filename(dataset) + '.pbtxt')
            print(f'  * [{dataset}]({url})')
Ejemplo n.º 10
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = sorted(_get_filenames())
    if not filenames:
        logging.info('nothing to do')
        return  # Nothing to do.
    datasets = {}
    for filename in filenames:
        datasets[filename] = message_helpers.load_message(
            filename, dataset_pb2.Dataset, FLAGS.input_format)
    if FLAGS.validate:
        validate(datasets)
    if not FLAGS.update:
        logging.info('nothing else to do; use --update for more')
        return  # Nothing else to do.
    for dataset in datasets.values():
        for reaction in dataset.reactions:
            update_reaction(reaction)
    combined = _combine_datasets(datasets)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        output_filename = _get_output_filename(combined.dataset_id)
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(filenames, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename,
                                  FLAGS.input_format)
Ejemplo n.º 11
0
 def test_add_dataset(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = True
     component.amount.moles.value = 2
     component.amount.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction.provenance.record_created.time.value = '2020-01-01'
     reaction.provenance.record_created.person.username = '******'
     reaction.provenance.record_created.person.email = '*****@*****.**'
     reaction.reaction_id = 'test'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test'})
     self.assertEmpty(removed)
     self.assertEmpty(changed)
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     # Check for assignment of dataset and reaction IDs.
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertNotEmpty(dataset.dataset_id)
     self.assertLen(dataset.reactions, 1)
     self.assertNotEmpty(dataset.reactions[0].reaction_id)
     # Check for binary output.
     root, ext = os.path.splitext(filenames[0])
     self.assertEqual(ext, '.pbtxt')
     self.assertTrue(os.path.exists(root + '.pb'))
Ejemplo n.º 12
0
 def test_add_dataset_with_large_data(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     image = reaction.observations.add().image
     image.bytes_value = b'test data value'
     image.format = 'png'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main(min_size=0.0)
     self.assertLen(filenames, 2)
     filenames.pop(filenames.index(self.dataset_filename))
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     relative_path = (
         'data/36/ord_data-'
         '36443a1839bf1160087422b7468a93c7b97dac7eea423bfac189208a15823139'
         '.png')
     expected = ('https://github.com/Open-Reaction-Database/'
                 'ord-submissions-test/tree/' + relative_path)
     self.assertEqual(dataset.reactions[0].observations[0].image.url,
                      expected)
     with open(os.path.join(self.test_subdirectory, relative_path),
               'rb') as f:
         self.assertEqual(b'test data value', f.read())
Ejemplo n.º 13
0
 def test_resolver(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='NAME', value='ethylamine')
     component.is_limiting = True
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 1)
     identifiers = (dataset.reactions[0].inputs['ethylamine'].components[0].
                    identifiers)
     self.assertLen(identifiers, 3)
     self.assertEqual(
         identifiers[1],
         reaction_pb2.CompoundIdentifier(
             type='SMILES', value='CCN',
             details='NAME resolved by PubChem'))
     self.assertEqual(identifiers[2].type,
                      reaction_pb2.CompoundIdentifier.RDKIT_BINARY)
Ejemplo n.º 14
0
 def test_add_dataset_with_existing_reaction_ids(self):
     reaction = reaction_pb2.Reaction()
     ethylamine = reaction.inputs['ethylamine']
     component = ethylamine.components.add()
     component.identifiers.add(type='SMILES', value='CCN')
     component.is_limiting = reaction_pb2.Boolean.TRUE
     component.moles.value = 2
     component.moles.units = reaction_pb2.Moles.MILLIMOLE
     reaction.outcomes.add().conversion.value = 25
     reaction_id = 'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'
     reaction.reaction_id = reaction_id
     reaction.provenance.record_created.time.value = '2020-01-01 11 am'
     dataset = dataset_pb2.Dataset(reactions=[reaction])
     dataset_filename = os.path.join(self.test_subdirectory, 'test.pbtxt')
     message_helpers.write_message(dataset, dataset_filename)
     filenames = self._run_main()
     self.assertLen(filenames, 2)
     self.assertFalse(os.path.exists(dataset_filename))
     filenames.pop(filenames.index(self.dataset_filename))
     self.assertLen(filenames, 1)
     dataset = message_helpers.load_message(filenames[0],
                                            dataset_pb2.Dataset)
     # Check that existing record IDs for added datasets are not overridden.
     self.assertEqual(dataset.reactions[0].reaction_id, reaction_id)
     self.assertLen(dataset.reactions[0].provenance.record_modified, 0)
Ejemplo n.º 15
0
 def test_round_trip(self, message_format):
     for message in self.messages:
         with tempfile.NamedTemporaryFile(suffix=message_format.value) as f:
             message_helpers.write_message(message, f.name)
             f.flush()
             self.assertEqual(
                 message,
                 message_helpers.load_message(f.name, type(message)))
Ejemplo n.º 16
0
 def test_modify_dataset_with_validation_errors(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     dataset.reactions[0].inputs['methylamine'].components[
         0].moles.value = (-2)
     message_helpers.write_message(dataset, self.dataset_filename)
     with self.assertRaisesRegex(ValueError, 'must be non-negative'):
         self._run_main()
Ejemplo n.º 17
0
 def test_main_with_updates(self):
     output = os.path.join(self.test_subdirectory, 'output.pbtxt')
     with flagsaver.flagsaver(input_pattern=self.dataset1_filename,
                              update=True,
                              output=output):
         process_dataset.main(())
     self.assertTrue(os.path.exists(output))
     dataset = message_helpers.load_message(output, dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 1)
     self.assertStartsWith(dataset.reactions[0].reaction_id, 'ord-')
Ejemplo n.º 18
0
def main(argv):
    del argv  # Only used by app.run().
    dataset = message_helpers.load_message(FLAGS.pb, dataset_pb2.Dataset)
    pb_data = text_format.MessageToString(dataset)
    with open(FLAGS.pbtxt) as f:
        pbtxt_data = f.read()
    if pb_data != pbtxt_data:
        diff = difflib.context_diff(pb_data.splitlines(),
                                    pbtxt_data.splitlines())
        raise ValueError(f'Datasets differ:\n{pprint.pformat(list(diff))}')
Ejemplo n.º 19
0
def migrate_one(user_id, name, conn):
    """Slurp one named dataset from the db/ directory into Postgres."""
    dataset = message_helpers.load_message(f'db/{user_id}/{name}',
                                           dataset_pb2.Dataset)
    serialized = dataset.SerializeToString().hex()
    query = psycopg2.sql.SQL(
        'INSERT INTO datasets VALUES (%s, %s, %s) '
        'ON CONFLICT (user_id, name) DO UPDATE SET serialized=%s')
    with conn.cursor() as cursor:
        cursor.execute(query, [user_id, name[:-6], serialized, serialized])
Ejemplo n.º 20
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d datasets', len(filenames))
    datasets = {}
    for filename in filenames:
        logging.info('Validating %s', filename)
        datasets[filename] = message_helpers.load_message(
            filename, dataset_pb2.Dataset)
    validations.validate_datasets(datasets)
Ejemplo n.º 21
0
 def test_modify_reaction_id(self):
     dataset = message_helpers.load_message(self.dataset_filename,
                                            dataset_pb2.Dataset)
     dataset.reactions[0].reaction_id = 'test_rename'
     message_helpers.write_message(dataset, self.dataset_filename)
     added, removed, changed, filenames = self._run()
     self.assertEqual(added, {'test_rename'})
     self.assertEqual(removed, {'ord-10aed8b5dffe41fab09f5b2cc9c58ad9'})
     self.assertEmpty(changed)
     self.assertCountEqual([self.dataset_filename], filenames)
Ejemplo n.º 22
0
def migrate_one(user_id, name, conn):
    """Slurp one named dataset from the db/ directory into Postgres."""
    dataset = message_helpers.load_message(
        os.path.join(os.path.dirname(__file__), "..", "db", user_id, name),
        dataset_pb2.Dataset)
    serialized = dataset.SerializeToString().hex()
    query = psycopg2.sql.SQL(
        "INSERT INTO datasets VALUES (%s, %s, %s) "
        "ON CONFLICT (user_id, name) DO UPDATE SET serialized=%s")
    with conn.cursor() as cursor:
        cursor.execute(query, [user_id, name[:-6], serialized, serialized])
Ejemplo n.º 23
0
 def test_main(self):
     output_filename = os.path.join(self.test_subdirectory, 'dataset.pbtxt')
     with flagsaver.flagsaver(template=self.template,
                              spreadsheet=self.spreadsheet,
                              output=output_filename):
         enumerate_dataset.main(())
     self.assertTrue(os.path.exists(output_filename))
     dataset = message_helpers.load_message(output_filename,
                                            dataset_pb2.Dataset)
     self.assertLen(dataset.reactions, 3)
     validations.validate_message(dataset, raise_on_error=True)
     self.assertEqual(dataset, self.expected)
Ejemplo n.º 24
0
def main(argv):
    del argv  # Only used by app.run().
    reaction = message_helpers.load_message(FLAGS.input, reaction_pb2.Reaction)
    if FLAGS.output_type == 'html':
        text = generate_text.generate_html(reaction)
    elif FLAGS.output_type == 'text':
        text = generate_text.generate_text(reaction)
    else:
        raise ValueError(f'unsupported output_type: {FLAGS.output_type}')
    if FLAGS.output:
        with open(FLAGS.output, 'w') as f:
            f.write(text)
    else:
        print(text)
Ejemplo n.º 25
0
def main(argv):
    del argv  # Only used by app.run()
    reaction = message_helpers.load_message(FLAGS.input_file,
                                            reaction_pb2.Reaction)

    if FLAGS.type == 'html':
        text = generate_html(reaction)
    elif FLAGS.type == 'text':
        text = generate_text(reaction)

    if FLAGS.output:
        with open(FLAGS.output, 'w') as fid:
            fid.write(text)
    else:
        print(text)
Ejemplo n.º 26
0
 def test_simple(self):
     input_pattern = os.path.join(self.test_subdirectory,
                                  'reaction-1.pbtxt')
     output_filename = os.path.join(self.test_subdirectory, 'dataset.pbtxt')
     with flagsaver.flagsaver(input=input_pattern,
                              name='test dataset',
                              description='this is a test dataset',
                              output=output_filename):
         build_dataset.main(())
     self.assertTrue(os.path.exists(output_filename))
     dataset = message_helpers.load_message(output_filename,
                                            dataset_pb2.Dataset)
     self.assertEqual(dataset.name, 'test dataset')
     self.assertEqual(dataset.description, 'this is a test dataset')
     self.assertLen(dataset.reactions, 1)
Ejemplo n.º 27
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input)
    logging.info('Found %d datasets', len(filenames))
    records = []
    for filename in filenames:
        dataset = message_helpers.load_message(filename, dataset_pb2.Dataset)
        for reaction in dataset.reactions:
            record_dict = get_database_json(reaction)
            record_dict['_dataset_id'] = dataset.dataset_id
            record_dict['_serialized'] = encode_bytes(
                reaction.SerializeToString())
            records.append(json.dumps(record_dict))
    with open(FLAGS.output, 'w') as f:
        for record in records:
            f.write(f'{record}\n')
Ejemplo n.º 28
0
def run():
    """Main function that returns added/removed reaction ID sets.

    This function should be called directly by tests to get access to the
    return values. If main() returns something other than None it will break
    shell error code logic downstream.

    Returns:
        added: Set of added reaction IDs.
        removed: Set of deleted reaction IDs.
        changed: Set of changed reaction IDs.
    """
    inputs = sorted(_get_inputs())
    if not inputs:
        logging.info('nothing to do')
        return set(), set(), set()  # Nothing to do.
    datasets = {}
    for file_status in inputs:
        if file_status.status == 'D':
            continue  # Nothing to do for deleted files.
        datasets[file_status.filename] = message_helpers.load_message(
            file_status.filename, dataset_pb2.Dataset)
    if FLAGS.validate:
        # Note: this does not check if IDs are malformed.
        validations.validate_datasets(datasets, FLAGS.write_errors)
    if FLAGS.base:
        added, removed, changed = get_change_stats(datasets,
                                                   inputs,
                                                   base=FLAGS.base)
        logging.info('Summary: +%d -%d Δ%d reaction IDs', len(added),
                     len(removed), len(changed))
        if (added or removed or changed) and FLAGS.issue and FLAGS.token:
            client = github.Github(FLAGS.token)
            repo = client.get_repo(os.environ['GITHUB_REPOSITORY'])
            issue = repo.get_issue(FLAGS.issue)
            issue.create_comment(
                f'Summary: +{len(added)} -{len(removed)} Δ{len(changed)} '
                'reaction IDs')
    else:
        added, removed, changed = None, None, None
    if FLAGS.update:
        _run_updates(inputs, datasets)
    else:
        logging.info('nothing else to do; use --update for more')
    return added, removed, changed
Ejemplo n.º 29
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d Reaction protos', len(filenames))
    reactions = []
    for filename in filenames:
        reactions.append(
            message_helpers.load_message(filename, reaction_pb2.Reaction))
    if not FLAGS.name:
        logging.warning('Consider setting the dataset name with --name')
    if not FLAGS.description:
        logging.warning(
            'Consider setting the dataset description with --description')
    dataset = dataset_pb2.Dataset(name=FLAGS.name,
                                  description=FLAGS.description,
                                  reactions=reactions)
    if FLAGS.validate:
        validations.validate_datasets({'_COMBINED': dataset})
    message_helpers.write_message(dataset, FLAGS.output)
Ejemplo n.º 30
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input)
    logging.info('Found %d datasets', len(filenames))
    with Tables() as tables:
        for filename in filenames:
            logging.info(filename)
            dataset = message_helpers.load_message(filename,
                                                   dataset_pb2.Dataset)
            for reaction in dataset.reactions:
                process_reaction(reaction, tables)
    if FLAGS.database:
        logging.info('Creating Postgres database')
        create_database()
    if FLAGS.cleanup:
        logging.info('Removing intermediate CSV files')
        for filename in glob.glob(os.path.join(FLAGS.output, '*.csv')):
            logging.info(filename)
            os.remove(filename)