Beispiel #1
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d datasets', len(filenames))
    datasets = {}
    for filename in filenames:
        logging.info('Validating %s', filename)
        datasets[filename] = message_helpers.load_message(
            filename, dataset_pb2.Dataset)
    validations.validate_datasets(datasets)
Beispiel #2
0
def _run_updates(inputs, datasets):
    """Updates the submission files.

    Args:
        inputs: List of FileStatus objects.
        datasets: Dict mapping filenames to Dataset messages.
    """
    for dataset in datasets.values():
        # Set reaction_ids, resolve names, fix cross-references, etc.
        updates.update_dataset(dataset)
        # Offload large Data values.
        data_filenames = data_storage.extract_data(dataset,
                                                   FLAGS.root,
                                                   min_size=FLAGS.min_size,
                                                   max_size=FLAGS.max_size)
        if data_filenames:
            args = ['git', 'add'] + list(data_filenames)
            logging.info('Running command: %s', ' '.join(args))
            subprocess.run(args, check=True)
    combined = _combine_datasets(datasets)
    # Final validation to make sure we didn't break anything.
    options = validations.ValidationOptions(validate_ids=True,
                                            require_provenance=True)
    validations.validate_datasets({'_COMBINED': combined},
                                  FLAGS.write_errors,
                                  options=options)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        _, suffix = os.path.splitext(inputs[0].filename)
        output_filename = os.path.join(
            FLAGS.root,
            message_helpers.id_filename(f'{combined.dataset_id}{suffix}'))
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(inputs, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename)
    # Write a binary version for fast read/write.
    root, ext = os.path.splitext(output_filename)
    if FLAGS.write_binary and ext != '.pb':
        binary_filename = root + '.pb'
        logging.info('writing combined Dataset (binary) to %s',
                     binary_filename)
        message_helpers.write_message(combined, binary_filename)
        args = ['git', 'add', binary_filename]
        logging.info('Running command: %s', ' '.join(args))
        subprocess.run(args, check=True)
Beispiel #3
0
def run():
    """Main function that returns added/removed reaction ID sets.

    This function should be called directly by tests to get access to the
    return values. If main() returns something other than None it will break
    shell error code logic downstream.

    Returns:
        added: Set of added reaction IDs.
        removed: Set of deleted reaction IDs.
        changed: Set of changed reaction IDs.
    """
    inputs = sorted(_get_inputs())
    if not inputs:
        logging.info('nothing to do')
        return set(), set(), set()  # Nothing to do.
    datasets = {}
    for file_status in inputs:
        if file_status.status == 'D':
            continue  # Nothing to do for deleted files.
        datasets[file_status.filename] = message_helpers.load_message(
            file_status.filename, dataset_pb2.Dataset)
    if FLAGS.validate:
        # Note: this does not check if IDs are malformed.
        validations.validate_datasets(datasets, FLAGS.write_errors)
    if FLAGS.base:
        added, removed, changed = get_change_stats(datasets,
                                                   inputs,
                                                   base=FLAGS.base)
        logging.info('Summary: +%d -%d Δ%d reaction IDs', len(added),
                     len(removed), len(changed))
        if (added or removed or changed) and FLAGS.issue and FLAGS.token:
            client = github.Github(FLAGS.token)
            repo = client.get_repo(os.environ['GITHUB_REPOSITORY'])
            issue = repo.get_issue(FLAGS.issue)
            issue.create_comment(
                f'Summary: +{len(added)} -{len(removed)} Δ{len(changed)} '
                'reaction IDs')
    else:
        added, removed, changed = None, None, None
    if FLAGS.update:
        _run_updates(inputs, datasets)
    else:
        logging.info('nothing else to do; use --update for more')
    return added, removed, changed
Beispiel #4
0
def main(argv):
    del argv  # Only used by app.run().
    filenames = glob.glob(FLAGS.input, recursive=True)
    logging.info('Found %d Reaction protos', len(filenames))
    reactions = []
    for filename in filenames:
        reactions.append(
            message_helpers.load_message(filename, reaction_pb2.Reaction))
    if not FLAGS.name:
        logging.warning('Consider setting the dataset name with --name')
    if not FLAGS.description:
        logging.warning(
            'Consider setting the dataset description with --description')
    dataset = dataset_pb2.Dataset(name=FLAGS.name,
                                  description=FLAGS.description,
                                  reactions=reactions)
    if FLAGS.validate:
        validations.validate_datasets({'_COMBINED': dataset})
    message_helpers.write_message(dataset, FLAGS.output)
def main(argv):
    del argv  # Only used by app.run().
    inputs = sorted(_get_inputs())
    if not inputs:
        logging.info('nothing to do')
        return  # Nothing to do.
    datasets = {}
    for file_status in inputs:
        datasets[file_status.filename] = message_helpers.load_message(
            file_status.filename, dataset_pb2.Dataset)
    if FLAGS.validate:
        validations.validate_datasets(datasets, FLAGS.write_errors)
    if not FLAGS.update:
        logging.info('nothing else to do; use --update for more')
        return  # Nothing else to do.
    for dataset in datasets.values():
        for reaction in dataset.reactions:
            updates.update_reaction(reaction)
        # Offload large Data values.
        data_filenames = data_storage.extract_data(dataset,
                                                   FLAGS.root,
                                                   min_size=FLAGS.min_size,
                                                   max_size=FLAGS.max_size)
        if data_filenames:
            args = ['git', 'add'] + data_filenames
            logging.info('Running command: %s', ' '.join(args))
            subprocess.run(args, check=True)
    combined = _combine_datasets(datasets)
    # Final validation to make sure we didn't break anything.
    validations.validate_datasets({'_COMBINED': combined}, FLAGS.write_errors)
    if FLAGS.output:
        output_filename = FLAGS.output
    else:
        _, suffix = os.path.splitext(inputs[0].filename)
        output_filename = os.path.join(
            FLAGS.root,
            message_helpers.id_filename(f'{combined.dataset_id}{suffix}'))
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    if FLAGS.cleanup:
        cleanup(inputs, output_filename)
    logging.info('writing combined Dataset to %s', output_filename)
    message_helpers.write_message(combined, output_filename)