コード例 #1
0
ファイル: build-db.py プロジェクト: danbjorn/til
def build(paths, dbname, table):
    """
    Load markdown files into a SQLite database

    Based on https://github.com/simonw/markdown-to-sqlite, modified to use markdown
    extensions.
    """
    db = Database(dbname)
    md = markdown.Markdown(
        extensions=["fenced_code", "codehilite"],
        extension_configs={"codehilite": {"guess_lang": "False"}},
    )
    docs = []
    for path in paths:
        metadata, text = yamldown.load(open(path))
        html = md.convert(text)
        doc = {
            "_id": hashlib.sha1(str(path).encode("utf8")).hexdigest(),
            "_path": str(path),
            "text": text,
            "html": html,
            **(metadata or {}),
        }
        docs.append(doc)
    db[table].upsert_all(docs, pk="_id")
コード例 #2
0
ファイル: metadata.py プロジェクト: valearna/ontobio
def get_yamldown_metadata(yamldown_dir, meta_id) -> dict:
    yamldown_md_path = os.path.join(yamldown_dir, "{}.md".format(meta_id))
    try:
        with open(yamldown_md_path, "r") as gorule_data:
            return yamldown.load(gorule_data)[0]
    except Exception as e:
        raise click.ClickException("Could not find or read {}: {}".format(yamldown_md_path))
コード例 #3
0
 def read_md_file(self) -> Tuple[Dict, str]:
     yml = {}
     md = ""
     if os.path.exists(self.file_path):
         with codecs.open(self.file_path, "r", 'utf-8') as file:
             (yml, md) = yamldown.load(file)
             logging.info((yml, md))
             if yml is None: yml = {}
     return (yml, md)
コード例 #4
0
def load_yamldown(path):
    """
    Loads a YAML file at path and returns it as a dictionary.
    """
    try:
        with open(path, "r") as f:
            return yamldown.load(f)[0]

    except Exception as e:
        raise click.ClickException(str(e))
コード例 #5
0
ファイル: validate.py プロジェクト: diatomsRcool/ontobio
def gorule_metadata(metadata, rule_id) -> str:
    gorule_yamldown = os.path.join(metadata, "rules", "{}.md".format(rule_id))
    try:
        with open(gorule_yamldown, "r") as gorule_data:
            click.echo("Found {rule} at {path}".format(rule=rule_id,
                                                       path=gorule_yamldown))
            return yamldown.load(gorule_data)[0]
    except Exception as e:
        raise click.ClickException("Could not find or read {}: {}".format(
            gorule_yamldown, str(e)))
コード例 #6
0
 def _read_yml_md_file(self) -> Tuple[Dict, str]:
     yml = {}
     md = ""
     if os.path.exists(self.file_path):
         with codecs.open(self.file_path, "r", 'utf-8') as file:
             if file.readline().strip() == "---":
                 file.seek(0)
                 (yml, md) = yamldown.load(file)
             else:
                 file.seek(0)
                 md = file.read()
             # logging.info((yml, md))
             if yml is None: yml = {}
     return (yml, md)
コード例 #7
0
 def _read_yml_md_file(self) -> Tuple[Dict, str]:
     metadata = {}
     content = ""
     if os.path.exists(self.file_path):
         with codecs.open(self.file_path, "r", 'utf-8') as file:
             if file.readline().strip() == "---":
                 file.seek(0)
                 (metadata, content) = yamldown.load(file)
             else:
                 file.seek(0)
                 content = file.read()
             # logging.info((metadata, content))
             if metadata is None: metadata = {}
     return (metadata, content)
コード例 #8
0
def load_yamldown(path):
    """
    Loads a YAML file at path and returns it as a dictionary.
    """
    try:
        with open(path, "r") as f:
            load = yamldown.load(f)[0]
            if load == None:
                raise click.ClickException(
                    "No rule present at {}".format(path))

            return load

    except Exception as e:
        raise click.ClickException(str(e))
コード例 #9
0
    def get_data_in_issue(issue):
        """ Get the YAML-structured data in an issue

        Args:
           issue (:obj:`dict`): properties of the GitHub issue for the submission

        Returns:
            :obj:`object`: YAML-structured data in an issue
        """
        body = io.StringIO(issue['body'].replace('\r', ''))

        # hack to make yamldown work with Python 3.9
        if not hasattr(yaml, 'FullLoader'):
            yaml.FullLoader = yaml.Loader

        data, _ = yamldown.load(body)

        return data
コード例 #10
0
def get_simulator_submission_from_gh_issue_body(body):
    """ Get a simulator submission from the YAML-structured data in an issue

    Args:
       body (:obj:`str`): body of a GitHub issue for the submission of a simulator

    Returns:
        :obj:`SimulatorSubmission`: simulator submission
    """
    body_stream = io.StringIO(body.replace('\r', ''))

    # hack to make yamldown work with Python 3.9
    if not hasattr(yaml, 'FullLoader'):
        yaml.FullLoader = yaml.Loader

    data, _ = yamldown.load(body_stream)

    return get_simulator_submission_from_gh_issue_body_data(data)
コード例 #11
0
def from_markdown_file(file_path, ignore_comments=True):
    sentences = []
    import yamldown as yamldown
    import regex
    with codecs.open(file_path, "r", 'utf-8') as in_file_obj:
        (yml, md) = yamldown.load(in_file_obj)
        if "title" in yml:
            sentences.append(yml["title"])

        ## Treat headings as sentences.
        md = regex.sub("^#(.+)\s*\n", "$1рее ", md)

        if ignore_comments:
            ## Ignore comments.
            md = regex.sub("\+\+\+\(.+?\)\+\+\+", "", md)

        # TODO: Process image alternate texts and captions?
        sentences.extend(from_plain_text(md))
    return sentences
コード例 #12
0
def compile_file(jinja_env, filename, source_dir, destination_dir, path_list):
    path = '/'.join(path_list)
    name_extension = os.path.splitext(filename)

    if name_extension[1] == '.md':
        output_filename = f'{name_extension[0]}.html'
    else:
        output_filename = filename

    try:
        with open(os.path.join(source_dir, path, filename)) as stream:
            metadata, source_code = yamldown.load(stream)
    except UnicodeDecodeError:
        metadata = None

    if metadata:
        if name_extension[1] == '.md':
            source_code = md(source_code)

        stage1 = jinja_env.from_string(stage1_template).render(
            page=metadata,
            extends=metadata.get('template'),
            source_code=source_code)
        stage2 = jinja_env.from_string(stage1).render(page=metadata)

        with open(os.path.join(destination_dir, path, output_filename),
                  'w+') as wstream:
            wstream.write(stage2)
    else:
        path_so_far = destination_dir
        for part in path_list:
            path_so_far = os.path.join(path_so_far, part)
            if not os.path.exists(path_so_far):
                os.mkdir(path_so_far)
        with open(os.path.join(source_dir, path, filename),
                  'rb') as src_stream:
            with open(os.path.join(destination_dir, path, output_filename),
                      'wb+') as dest_stream:
                data = src_stream.read(512)
                while data != b'':
                    dest_stream.write(data)
                    data = src_stream.read(512)
コード例 #13
0
def main(report, template, date):
    report_json = json.load(report)

    header = sorted([{"id": dataset["id"]} for dataset in report_json], key=lambda h: h["id"])
    # click.echo(json.dumps(header, indent=4))

    rules_directory = os.path.normpath(os.path.join(os.path.dirname(this_script), "../metadata/rules"))

    rules_descriptions = dict()
    for rule_path in glob.glob(os.path.join(rules_directory, "gorule*.md")):
        with open(rule_path) as rule_file:
            rule = yamldown.load(rule_file)[0]
            rule_id = rule["id"].lower().replace(":", "-")
            rules_descriptions[rule_id] = rule["title"]


    rule_by_dataset = dict()
    # {
    #     "gorule-0000005": {
    #         "mgi": 30,
    #         "sgd": 25,
    #         "blah": 45
    #     },
    #     "gorule-0000006": {
    #         "mgi": 20,
    #         "sgd": 11
    #     }
    # }

    # [
    #     {
    #         "rule": "gorule-0000005",
    #         "dataset": [
    #             {
    #                 "id": "mgi",
    #                 "messages": 20
    #             }
    #         ]
    #     }
    # ]
    ###################################
    # {
    #     "gorule-0000005": {
    #         "rule": "gorule-0000005",
    #         "mgi": 20,
    #         "sgd": 11,
    #         "wb": 300
    #     },
    #     "other": {
    #         "rule": "other",
    #         "mgi": 25,
    #         "sgd": 25,
    #         "wb": 33
    #     }
    # }

    bootstrap_context_mapping = {
        "warning": "warning",
        "error": "danger",
        "info": "primary"
    }

    for dataset in report_json:
        for rule, messages in dataset["messages"].items():
            if rule not in rule_by_dataset:
                level = messages[0]["level"].lower() if len(messages) > 0 else "info"
                rule_by_dataset[rule] = {
                    dataset["id"]: len(messages),
                    "level": level,
                    "rule": rule
                }
            else:
                rule_by_dataset[rule][dataset["id"]] = len(messages)
                rule_by_dataset[rule]["level"] = messages[0]["level"].lower() if len(messages) > 0 else "info"

    # Add empty cells in as 0s
    for h in header:
        for rule, amounts in rule_by_dataset.items():
            if h["id"] not in amounts:
                amounts[h["id"]] = 0

    # click.echo(json.dumps(rule_by_dataset, indent=4))
    rows = sorted(rule_by_dataset.values(), key=lambda n: n["rule"])
    # print(json.dumps(rows[0:4], indent=4))

    cells = []
    for row in rows:
        contents = []
        level = bootstrap_context_mapping[row["level"]]
        for key, val in row.items():
            if key == "rule":
                continue

            if key == "level":
                continue

            v = {
                "dataset": key,
                "amount": val,
                "has-zero-messages": val==0,
                "level": level if val > 0 else "primary"
            }
            contents.append(v)

        contents = sorted(contents, key=lambda d: d["dataset"])
        cell = {
            "rule": row["rule"],
            "title": rules_descriptions.get(row["rule"], ""),
            "messages": contents,
            "is-other": row["rule"] == "other"
        }
        cells.append(cell)

    # print(json.dumps(cells[0:4], indent=4))

    rendered = pystache.render(template.read(), {"header": header, "rules": cells, "date": date})

    print(rendered)
コード例 #14
0
def main():
    """The main runner for our script."""

    ## Deal with incoming.
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-d', '--directory',
                        help='The directory of the GO refs')
    parser.add_argument('-j', '--json',
                        help='JSON output file')
    parser.add_argument('-s', '--stanza',
                        help='Stanza-based output file')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='More verbose output')
    args = parser.parse_args()

    if args.verbose:
        LOG.setLevel(logging.INFO)
        LOG.info('Verbose: on')

    ## Ensure directories and outputs.
    if not args.directory:
        die_screaming('need a directory argument')
    LOG.info('Will operate in: ' + args.directory)
    ## Ensure output file.
    if not args.json and not args.stanza:
        die_screaming('need an output file argument, --json or --stanza')
    LOG.info('Will output JSON to: ' + args.json)
    LOG.info('Will output stanza to: ' + args.stanza)

    ## Main data hold.
    reference_data = []

    ## Get files out of target directory, flipping the frontmatter
    ## into JSON.
    LOG.info('Globbing GO ref YAMLs in data directory: ' + args.directory + '/goref-*.md')
    src_filenames = glob.glob(args.directory + '/go*-*.md')
    for src_filename in src_filenames:

        LOG.info('GO ref filename: ' + src_filename)

        with open(src_filename, "r") as f:
            yml, md = yamldown.load(f)

            ## Break the md into the title, abstract, and comments.
            mdj_text = pypandoc.convert_text(md, 'json', format='markdown')
            mdj = json.loads(mdj_text)
            title = 'n/a'
            abstract = 'n/a'
            comments = 'n/a'
            next_block_type = None
            ## A workaround for the change in JSON format in pandoc in
            ## 1.18; Ubuntu 16.04 uses 1.16.0.2 and 18.04 uses
            ## 1.19.2.4.
            blocks = None
            if type(mdj) == list:
                blocks = mdj[1]
            else:
                blocks = mdj['blocks']
            for block in blocks:
                ## If is a header and has something there in the
                ## header.
                #LOG.info(json.dumps(block))
                if block.get('t', False) == "Header":
                    if block.get('c', False) and len(block['c']) >= 2:

                        ## Capture the title.
                        header_text = wtflist2str(block['c'][2])
                        #LOG.info('header text: ' + header_text)

                        if header_text.casefold() == "comments" or header_text.casefold() == "comment":
                            next_block_type = "comments"
                            #LOG.info("<<next: comments>>")
                        else:
                            ## Otherwise, we're going to assume this
                            ## is an abstract.
                            title = header_text
                            next_block_type = "abstract"
                            #LOG.info("<<next: abstract>>")

                    else:
                        raise Exception("Unknown HEADER")

                elif block['t'] == "Para":
                    if block.get('c', False) and len(block['c']) > 0:

                        ## Capture the title.
                        para_text = wtflist2str(block['c'])

                        if next_block_type == "comments":
                            comments = para_text
                            #LOG.info('comments text: ' + para_text)
                        elif next_block_type == "abstract":
                            abstract = para_text
                            #LOG.info('abstract text: ' + para_text)

                    else:
                        raise Exception("Unknown PARA")

                else:
                    raise Exception("Unknown ENTITY")

            yml['abstract'] = abstract
            yml['comments'] = comments
            yml['title'] = title
            reference_data.append(yml)

    ## Sort by id.
    reference_data = sorted(reference_data, key=lambda k: k['id'])

    ## Final JSON writeout.
    if args.json:
        with open(args.json, 'w+') as fhandle:
            fhandle.write(json.dumps(reference_data, sort_keys=True, indent=4))

    ## Final JSON writeout.
    if args.stanza:
        with open(args.stanza, 'w+') as fhandle:

            file_cache = []
            for ref in reference_data:
                stanza_cache = []

                if ref.get('id', False):
                    stanza_cache.append('go_ref_id: ' + ref.get('id'))

                alt_ids = ref.get('alt_id', [])
                for alt_id in alt_ids:
                    stanza_cache.append('alt_id: ' + alt_id)

                if ref.get('title', False):
                    stanza_cache.append('title: ' + ref.get('title'))

                if ref.get('authors', False):
                    stanza_cache.append('authors: ' + ref.get('authors'))

                if ref.get('year', False):
                    stanza_cache.append('year: ' + str(ref.get('year')))

                external_accessions = ref.get('external_accession', [])
                for external_accession in external_accessions:
                    stanza_cache.append('external_accession: ' + external_accession)

                if ref.get('abstract', False):
                    stanza_cache.append('abstract: ' + ref.get('abstract'))

                if ref.get('comments', False):
                    stanza_cache.append('comment: ' + ref.get('comments'))

                file_cache.append("\n".join(stanza_cache))

            fhandle.write(header + "\n\n".join(file_cache))
コード例 #15
0
def main(report, template, date, suppress_rule_tag):
    report_json = json.load(report)

    header = sorted([{"id": dataset["id"]} for dataset in report_json], key=lambda h: h["id"])
    # click.echo(json.dumps(header, indent=4))

    rules_directory = os.path.normpath(os.path.join(os.path.dirname(this_script), "../metadata/rules"))

    rules_descriptions = dict()
    # Rule Descriptions is a map of rule ID to a {"title": rule title, "tags": list of possible rule tags}
    for rule_path in glob.glob(os.path.join(rules_directory, "gorule*.md")):
        with open(rule_path) as rule_file:
            rule = yamldown.load(rule_file)[0]
            rule_id = rule["id"].lower().replace(":", "-")
            rules_descriptions[rule_id] = {
                "title": rule["title"],
                "tags": rule.get("tags", [])
            }


    rule_by_dataset = dict()
    # {
    #     "gorule-0000005": {
    #         "mgi": 30,
    #         "sgd": 25,
    #         "blah": 45
    #     },
    #     "gorule-0000006": {
    #         "mgi": 20,
    #         "sgd": 11
    #     }
    # }

    # [
    #     {
    #         "rule": "gorule-0000005",
    #         "dataset": [
    #             {
    #                 "id": "mgi",
    #                 "messages": 20
    #             }
    #         ]
    #     }
    # ]
    ###################################
    # {
    #     "gorule-0000005": {
    #         "rule": "gorule-0000005",
    #         "mgi": 20,
    #         "sgd": 11,
    #         "wb": 300
    #     },
    #     "other": {
    #         "rule": "other",
    #         "mgi": 25,
    #         "sgd": 25,
    #         "wb": 33
    #     }
    # }

    bootstrap_context_mapping = {
        "warning": "warning",
        "error": "danger",
        "info": "primary"
    }

    for dataset in report_json:
        for rule, messages in dataset["messages"].items():
            if any([tag in rules_descriptions.get(rule, {}).get("tags", []) for tag in suppress_rule_tag ]):
                # For any that is passed in to be suppressed, if it is a tag in the rule, then skip the rule.
                continue

            if rule not in rule_by_dataset:
                level = messages[0]["level"].lower() if len(messages) > 0 else "info"
                rule_by_dataset[rule] = {
                    dataset["id"]: len(messages),
                    "level": level,
                    "rule": rule
                }
            else:
                rule_by_dataset[rule][dataset["id"]] = len(messages)
                rule_by_dataset[rule]["level"] = messages[0]["level"].lower() if len(messages) > 0 else "info"

    # Add empty cells in as 0s
    for h in header:
        for rule, amounts in rule_by_dataset.items():
            if h["id"] not in amounts:
                amounts[h["id"]] = 0

    # click.echo(json.dumps(rule_by_dataset, indent=4))
    rows = sorted(rule_by_dataset.values(), key=lambda n: n["rule"])
    # print(json.dumps(rows[0:4], indent=4))

    cells = []
    for row in rows:
        contents = []
        level = bootstrap_context_mapping[row["level"]]
        for key, val in row.items():
            if key == "rule":
                continue

            if key == "level":
                continue

            v = {
                "dataset": key,
                "amount": val,
                "has-zero-messages": val==0,
                "level": level if val > 0 else "primary"
            }
            contents.append(v)

        contents = sorted(contents, key=lambda d: d["dataset"])
        cell = {
            "rule": row["rule"],
            "title": rules_descriptions.get(row["rule"], {}).get("title", ""),
            "messages": contents,
            "is-other": row["rule"] == "other"
        }
        cells.append(cell)

    # print(json.dumps(cells[0:4], indent=4))

    rendered = pystache.render(template.read(), {"header": header, "rules": cells, "date": date})

    print(rendered)
コード例 #16
0
def main(report, template, date, suppress_rule_tag):
    # Make the input json look more like the "combined report" from reports-page-gen.py
    report_json = json.load(report)
    report_json["id"] = "gocam"
    report_json = [report_json]

    # header:
    # [
    #     {"id": "mgi"},
    #     {"id": "goa_chicken"}
    #     ...
    # ]
    header = sorted([{
        "id": dataset["id"]
    } for dataset in report_json],
                    key=lambda h: h["id"])
    # click.echo(json.dumps(header, indent=4))

    rules_directory = os.path.normpath(
        os.path.join(os.path.dirname(this_script), "../metadata/rules"))

    rules_descriptions = dict()
    # Rule Descriptions is a map of rule ID to a {"title": rule title, "tags": list of possible rule tags}
    for rule_path in glob.glob(os.path.join(rules_directory, "gorule*.md")):
        with open(rule_path) as rule_file:
            rule = yamldown.load(rule_file)[0]
            rule_id = rule["id"].lower().replace(":", "-")

            rules_descriptions[rule_id] = {
                "title": rule["title"],
                "tags": rule.get("tags", [])
            }

    rule_by_dataset = dict()
    # {
    #     "gorule-0000005": {
    #         "mgi": 30,
    #         "sgd": 25,
    #         "blah": 45
    #     },
    #     "gorule-0000006": {
    #         "mgi": 20,
    #         "sgd": 11
    #     }
    # }

    # [
    #     {
    #         "rule": "gorule-0000005",
    #         "dataset": [
    #             {
    #                 "id": "mgi",
    #                 "messages": 20
    #             }
    #         ]
    #     }
    # ]
    ###################################
    # {
    #     "gorule-0000005": {
    #         "rule": "gorule-0000005",
    #         "mgi": 20,
    #         "sgd": 11,
    #         "wb": 300
    #     },
    #     "other": {
    #         "rule": "other",
    #         "mgi": 25,
    #         "sgd": 25,
    #         "wb": 33
    #     }
    # }

    bootstrap_context_mapping = {
        "warning": "warning",
        "error": "danger",
        "info": "primary"
    }

    for dataset in report_json:
        # Rule: rule ID, messages: List of each message from parsing
        for rule, messages in dataset["messages"].items():
            if any([
                    tag in rules_descriptions.get(rule, {}).get("tags", [])
                    for tag in suppress_rule_tag
            ]):
                # For any that is passed in to be suppressed, if it is a tag in the rule, then skip the rule.
                continue

            # If we haven't added the rule, then add the messages, level, and rule ID to the value (keyed to the rule ID)
            if rule not in rule_by_dataset:
                level = messages[0]["level"].lower(
                ) if len(messages) > 0 else "info"
                rule_by_dataset[rule] = {
                    dataset["id"]: len(messages),
                    "level": level,
                    "rule": rule
                }
            else:
                rule_by_dataset[rule][dataset["id"]] = len(messages)
                # We can only increase `level`. If level is info, but messages are warn or error, than we reset.
                # If level is warning, then only error will replace, since it's "higher".
                if rule_by_dataset[rule]["level"] == "info" and len(
                        messages) > 0 and messages[0]["level"].lower() in [
                            "error", "warning"
                        ]:
                    rule_by_dataset[rule]["level"] = messages[0][
                        "level"].lower()
                elif rule_by_dataset[rule]["level"] == "warning" and len(
                        messages) > 0 and messages[0]["level"].lower(
                        ) == "error":
                    rule_by_dataset[rule]["level"] = "error"

    # Add empty cells in as 0s
    for h in header:
        # h: {"id": "mgi"}
        for rule, amounts in rule_by_dataset.items():
            # rule: "gorule-0000006", amounts: {"mgi": 20, "sgd": 11, ...}
            # If the header name (the dataset name) is not accounted in the amounts dict, add it as 0
            if h["id"] not in amounts:
                amounts[h["id"]] = 0

    # Sort the list of rules -> {set of dataset:number of messages} by rule title (alphabet)
    rows = sorted(rule_by_dataset.values(), key=lambda n: n["rule"])

    # Each "cell" is actually a row in the table.
    # Each `v` below is a cell contents along the row
    cells = []
    for row in rows:
        contents = []
        level = bootstrap_context_mapping[row["level"]]
        for key, val in row.items():
            if key == "rule":
                continue

            if key == "level":
                continue

            v = {
                "dataset": key,
                "amount": val,
                "has-zero-messages": val == 0,
                "level": level if val > 0 else "primary"
            }
            contents.append(v)

        contents = sorted(contents, key=lambda d: d["dataset"])
        cell = {
            "rule": row["rule"],
            "title": rules_descriptions.get(row["rule"], {}).get("title", ""),
            "messages": contents,
            "is-other": row["rule"] == "other"
        }
        cells.append(cell)

    # print(json.dumps(cells[0:4], indent=4))

    rendered = pystache.render(template.read(), {
        "header": header,
        "rules": cells,
        "date": date
    })

    print(rendered)
コード例 #17
0
def main():
    """The main runner for our script."""

    ## Deal with incoming.
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-d',
                        '--directory',
                        help='The directory of the GO refs')
    parser.add_argument('-j', '--json', help='JSON output file')
    parser.add_argument('-s', '--stanza', help='Stanza-based output file')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='More verbose output')
    args = parser.parse_args()

    if args.verbose:
        LOG.setLevel(logging.INFO)
        LOG.info('Verbose: on')

    ## Ensure directories and outputs.
    if not args.directory:
        die_screaming('need a directory argument')
    LOG.info('Will operate in: ' + args.directory)
    ## Ensure output file.
    if not args.json and not args.stanza:
        die_screaming('need an output file argument, --json or --stanza')
    LOG.info('Will output JSON to: ' + args.json)
    LOG.info('Will output stanza to: ' + args.stanza)

    ## Main data hold.
    reference_data = []

    ## Get files out of target directory, flipping the frontmatter
    ## into JSON.
    LOG.info('Globbing GO ref YAMLs in data directory: ' + args.directory +
             '/goref-*.md')
    src_filenames = glob.glob(args.directory + '/go*-*.md')
    for src_filename in src_filenames:

        LOG.info('GO ref filename: ' + src_filename)

        with open(src_filename, "r") as f:
            yml, md = yamldown.load(f)

            ## Break the md into the title, abstract, and comments.
            mdj_text = pypandoc.convert_text(md, 'json', format='markdown')
            mdj = json.loads(mdj_text)
            title = 'n/a'
            abstract = 'n/a'
            comments = 'n/a'
            next_block_type = None
            ## A workaround for the change in JSON format in pandoc in
            ## 1.18; Ubuntu 16.04 uses 1.16.0.2 and 18.04 uses
            ## 1.19.2.4.
            blocks = None
            if type(mdj) == list:
                blocks = mdj[1]
            else:
                blocks = mdj['blocks']
            for block in blocks:
                ## If is a header and has something there in the
                ## header.
                #LOG.info(json.dumps(block))
                if block.get('t', False) == "Header":
                    if block.get('c', False) and len(block['c']) >= 2:

                        ## Capture the title.
                        header_text = wtflist2str(block['c'][2])
                        #LOG.info('header text: ' + header_text)

                        if header_text.casefold(
                        ) == "comments" or header_text.casefold() == "comment":
                            next_block_type = "comments"
                            #LOG.info("<<next: comments>>")
                        else:
                            ## Otherwise, we're going to assume this
                            ## is an abstract.
                            title = header_text
                            next_block_type = "abstract"
                            #LOG.info("<<next: abstract>>")

                    else:
                        raise Exception("Unknown HEADER")

                elif block['t'] == "Para":
                    if block.get('c', False) and len(block['c']) > 0:

                        ## Capture the title.
                        para_text = wtflist2str(block['c'])

                        if next_block_type == "comments":
                            comments = para_text
                            #LOG.info('comments text: ' + para_text)
                        elif next_block_type == "abstract":
                            abstract = para_text
                            #LOG.info('abstract text: ' + para_text)

                    else:
                        raise Exception("Unknown PARA")

                else:
                    raise Exception("Unknown ENTITY")

            yml['abstract'] = abstract
            yml['comments'] = comments
            yml['title'] = title
            reference_data.append(yml)

    ## Sort by id.
    reference_data = sorted(reference_data, key=lambda k: k['id'])

    ## Final JSON writeout.
    if args.json:
        with open(args.json, 'w+') as fhandle:
            fhandle.write(json.dumps(reference_data, sort_keys=True, indent=4))

    ## Final JSON writeout.
    if args.stanza:
        with open(args.stanza, 'w+') as fhandle:

            file_cache = []
            for ref in reference_data:
                stanza_cache = []

                if ref.get('id', False):
                    stanza_cache.append('go_ref_id: ' + ref.get('id'))

                alt_ids = ref.get('alt_id', [])
                for alt_id in alt_ids:
                    stanza_cache.append('alt_id: ' + alt_id)

                if ref.get('title', False):
                    stanza_cache.append('title: ' + ref.get('title'))

                if ref.get('authors', False):
                    stanza_cache.append('authors: ' + ref.get('authors'))

                if ref.get('year', False):
                    stanza_cache.append('year: ' + str(ref.get('year')))

                external_accessions = ref.get('external_accession', [])
                for external_accession in external_accessions:
                    stanza_cache.append('external_accession: ' +
                                        external_accession)

                if ref.get('abstract', False):
                    stanza_cache.append('abstract: ' + ref.get('abstract'))

                if ref.get('comments', False):
                    stanza_cache.append('comment: ' + ref.get('comments'))

                file_cache.append("\n".join(stanza_cache))

            fhandle.write(header + "\n\n".join(file_cache))