Python extract_nodes Examples, utils.extract_nodes Python Examples

Example #1

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def conflict(soup):
    conflict_tags = extract_nodes(soup, "fn", attr="fn-type", value="conflict")
    conflict_tags += extract_nodes(soup,
                                   "fn",
                                   attr="fn-type",
                                   value="COI-statement")
    return conflict_tags

Example #2

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def abstract(soup, abstract_type=None):
    if abstract_type:
        return extract_nodes(soup,
                             "abstract",
                             attr="abstract-type",
                             value=abstract_type)
    else:
        return extract_nodes(soup, "abstract")

Example #3

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def custom_meta(soup, meta_name=None):
    custom_meta_tags = extract_nodes(soup, "custom-meta")
    if meta_name is not None:
        custom_meta_tags = filter(
            lambda tag: node_contents_str(
                first(extract_nodes(tag, "meta-name"))) == meta_name,
            custom_meta_tags)
    return custom_meta_tags

Example #4

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def authors(soup, contrib_type="author"):
    if contrib_type:
        return extract_nodes(soup,
                             "contrib",
                             attr="contrib-type",
                             value=contrib_type)
    else:
        return extract_nodes(soup, "contrib")

Example #5

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def fn_group(soup, content_type=None):
    if content_type:
        return extract_nodes(soup,
                             "fn-group",
                             attr="content-type",
                             value=content_type)
    else:
        return extract_nodes(soup, "fn-group")

Example #6

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def pub_id(soup, pub_id_type=None):
    if pub_id_type:
        return extract_nodes(soup,
                             "pub-id",
                             attr="pub-id-type",
                             value=pub_id_type)
    else:
        return extract_nodes(soup, "pub-id")

Example #7

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def ext_link(soup, ext_link_type=None):
    if ext_link_type:
        return extract_nodes(soup,
                             "ext-link",
                             attr="ext-link-type",
                             value=ext_link_type)
    else:
        return extract_nodes(soup, "ext-link")

Example #8

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def journal_id(soup):
    # the first non-nil tag
    return firstnn(
        extract_nodes(soup,
                      "journal-id",
                      attr="journal-id-type",
                      value="publisher-id"))

Example #9

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def research_organism_keywords(soup):
    tags = first(
        extract_nodes(soup,
                      "kwd-group",
                      attr="kwd-group-type",
                      value="research-organism"))
    if not tags:
        return None
    return filter(lambda tag: tag.name == "kwd", tags) or None

Example #10

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def author_keywords(soup):
    # A few articles have kwd-group with no kwd-group-type, so account for those
    tags = extract_nodes(soup, "kwd-group")
    keyword_tags = []
    for tag in tags:
        if (tag.get("kwd-group-type") == "author-keywords" 
            or tag.get("kwd-group-type") is None):
            keyword_tags += filter(lambda tag: tag.name == "kwd", tag)
    return keyword_tags

Example #11

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def article_contributors(soup):
    article_meta_tag = article_meta(soup)
    if article_meta_tag:
        contributor_tags = extract_nodes(article_meta_tag,
                                         ["contrib", "on-behalf-of"])
        return filter(lambda tag: tag.parent.name == "contrib-group",
                      contributor_tags)
    else:
        return None

Example #12

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def author_keywords(soup):
    # A few articles have kwd-group with no kwd-group-type, so account for those
    tags = extract_nodes(soup, "kwd-group")
    keyword_tags = []
    for tag in tags:
        if (tag.get("kwd-group-type") == "author-keywords"
                or tag.get("kwd-group-type") is None):
            keyword_tags += filter(lambda tag: tag.name == "kwd", tag)
    return keyword_tags

Example #13

0

Show file

File: rawJATS.py Project: code56/elife-tools

def full_subject_area(soup, subject_group_type=None):

    subject_group_tags = extract_nodes(soup, "subj-group")
    subject_group_tags = filter(lambda tag: tag.parent.name == "article-categories"
                                              and tag.parent.parent.name == "article-meta", subject_group_tags)

    if subject_group_type:
        subject_group_tags = filter(lambda tag:
                                    tag.get("subj-group-type" == subject_group_type))

    return subject_group_tags

Example #14

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def subject_area(soup, subject_group_type = None):
    # Supports all subject areas or just particular ones filtered by 
    subject_area_tags = []
    tags = extract_nodes(soup, "subject")
    
    subject_area_tags = filter(lambda tag: tag.parent.name == "subj-group" \
                                           and tag.parent.parent.name == "article-categories" \
                                           and tag.parent.parent.parent.name == "article-meta", tags)
    if subject_group_type:
        subject_area_tags = filter(lambda tag:
                                    tag.parent.get("subj-group-type") == subject_group_type, tags)
    return subject_area_tags

Example #15

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def full_subject_area(soup, subject_group_type=None):

    subject_group_tags = extract_nodes(soup, "subj-group")
    subject_group_tags = filter(
        lambda tag: tag.parent.name == "article-categories" and tag.parent.
        parent.name == "article-meta", subject_group_tags)

    if subject_group_type:
        subject_group_tags = filter(
            lambda tag: tag.get("subj-group-type" == subject_group_type))

    return subject_group_tags

Example #16

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def subject_area(soup, subject_group_type=None):
    # Supports all subject areas or just particular ones filtered by
    subject_area_tags = []
    tags = extract_nodes(soup, "subject")

    subject_area_tags = filter(lambda tag: tag.parent.name == "subj-group" \
                                           and tag.parent.parent.name == "article-categories"
                                           and tag.parent.parent.parent.name == "article-meta", tags)
    if subject_group_type:
        subject_area_tags = filter(
            lambda tag: tag.parent.get("subj-group-type") ==
            subject_group_type, tags)
    return subject_area_tags

Example #17

0

Show file

File: vbmap_vis.py Project: aartamonau/vbmap_utils

def simulate(vbmap):
    pylab.figure()

    nodes = extract_nodes(vbmap['map'])
    nodes_count = len(nodes)

    vbmaps = simulate_failovers(vbmap)

    charts_count = len(vbmaps)
    rows = cols = int(math.ceil(math.sqrt(charts_count)))

    def plot(vbmap, chart):
        pylab.subplot(rows, cols, chart)
        masters = [n for n in extract_masters(vbmap) if n is not None]

        pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)
        pylab.hist(masters, bins=xrange(nodes_count + 1))
        pylab.xlabel("Nodes")
        pylab.ylabel("Number of vbuckets")
        pylab.legend()

    for chart, vbmap in enumerate(vbmaps, 1):
        plot(vbmap, chart)

Example #18

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def award_group(soup):
    return extract_nodes(soup, "award-group")

Example #19

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def list_item(soup):
    return extract_nodes(soup, "list-item")

Example #20

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def fig_group(soup):
    return extract_nodes(soup, "fig-group")

Example #21

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def year(soup):
    return first(extract_nodes(soup, "year"))

Example #22

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def publisher_id(soup):
    article_id_tags = extract_nodes(soup, "article-id", attr = "pub-id-type", value = "publisher-id")
    # the first article-id tag whose parent is article-meta
    return first(filter(lambda tag: tag.parent.name == "article-meta", article_id_tags))

Example #23

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def month(soup):
    return first(extract_nodes(soup, "month"))

Example #24

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def history_date(soup, date_type):
    date_tags = extract_nodes(soup, "date", attr="date-type", value=date_type)
    return first(filter(lambda tag: tag.parent.name == "history", date_tags))

Example #25

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def copyright_year(soup):
    return first(extract_nodes(permissions(soup), "copyright-year"))

Example #26

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def copyright_statement(soup):
    return first(extract_nodes(permissions(soup), "copyright-statement"))

Example #27

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def article_title(soup):
    return first(extract_nodes(soup, "article-title"))

Example #28

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def licence_p(soup):
    return first(extract_nodes(licence(soup), "license-p"))

Example #29

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def licence(soup):
    return first(extract_nodes(permissions(soup), "license"))

Example #30

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def permissions(soup):
    # a better selector might be "article-meta.permissions"
    return first(extract_nodes(soup, "permissions"))

Example #31

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def conflict(soup):
    return extract_nodes(soup, "fn", attr = "fn-type", value = "conflict")

Example #32

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def string_name(soup):
    return extract_nodes(soup, "string-name")

Example #33

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def acknowledgements(soup):
    return first(extract_nodes(soup, "ack"))

Example #34

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def copyright_holder(soup):
    return first(extract_nodes(permissions(soup), "copyright-holder"))

Example #35

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def funding_statement(soup):
    return first(extract_nodes(soup, "funding-statement"))

Example #36

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def research_organism_keywords(soup):
    tags = first(extract_nodes(soup, "kwd-group", attr = "kwd-group-type", value = "research-organism"))
    if not tags:
        return None
    return filter(lambda tag: tag.name == "kwd", tags) or None

Example #37

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def journal_id(soup):
    # the first non-nil tag
    return firstnn(extract_nodes(soup, "journal-id", attr = "journal-id-type", value = "hwp"))

Example #38

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def history_date(soup, date_type):
    date_tags = extract_nodes(soup, "date", attr = "date-type", value = date_type)
    return first(filter(lambda tag: tag.parent.name == "history", date_tags))

Example #39

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def doi(soup):
    doi_tags = extract_nodes(soup, "article-id", attr = "pub-id-type", value = "doi")
    # the first article-id tag whose parent is article-meta
    return first(filter(lambda tag: tag.parent.name == "article-meta", doi_tags))

Example #40

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def pub_date_collection(soup, pub_type):
    return first(extract_nodes(soup, "pub-date", attr = "pub-type", value = pub_type))

Example #41

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def fig(soup):
    return extract_nodes(soup, "fig")

Example #42

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def pub_date(soup, date_type):
    return first(extract_nodes(soup, "pub-date", attr = "date-type", value = date_type))

Example #43

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def list(soup):
    return extract_nodes(soup, "list")

Example #44

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def journal_issn(soup, pub_format):
    return first(
        extract_nodes(soup,
                      "issn",
                      attr="publication-format",
                      value=pub_format))

Example #45

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def funding_group(soup):
    return extract_nodes(soup, "funding-group")

Example #46

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def article_type(soup):
    # returns raw data, just that the data doesn't contain any BS nodes
    return first(extract_nodes(soup, "article")).get('article-type')

Example #47

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def principal_award_recipient(soup):
    return extract_nodes(soup, "principal-award-recipient")

Example #48

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def pub_date(soup, date_type):
    return first(
        extract_nodes(soup, "pub-date", attr="date-type", value=date_type))

Example #49

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def journal_title(soup):
    return first(extract_nodes(soup, "journal-title"))

Example #50

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def keyword_group(soup):
    return extract_nodes(soup, "kwd-group")

Example #51

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def publisher(soup):
    return first(extract_nodes(soup, "publisher-name"))

Example #52

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def article_type(soup):
    # returns raw data, just that the data doesn't contain any BS nodes
    return first(extract_nodes(soup, "article")).get('article-type')

Example #53

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def article_meta(soup):
    return first(extract_nodes(soup, "article-meta"))

Example #54

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def publisher(soup):
    return first(extract_nodes(soup, "publisher-name"))

Example #55

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def pub_date_collection(soup, pub_type):
    return first(
        extract_nodes(soup, "pub-date", attr="pub-type", value=pub_type))

Example #56

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def journal_issn(soup, pub_format):
    return first(extract_nodes(soup, "issn", attr = "publication-format", value = pub_format))

Example #57

0

Show file

File: rawJATS.py Project: gnott/elife-tools

def day(soup):
    return first(extract_nodes(soup, "day"))

Example #58

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def journal_title(soup):
    return first(extract_nodes(soup, "journal-title"))

Example #59

0

Show file

File: vbmap_vis.py Project: aartamonau/vbmap_utils

def main():
    vbmap = load_vbmap(sys.argv[1])

    masters = extract_masters(vbmap['map'])
    replicas = extract_replicas(vbmap['map'])

    nodes = extract_nodes(vbmap['map'])
    nodes_count = len(nodes)

    nodes_dict = dict((n, i) for i, n in enumerate(nodes))

    tags = extract_tags(vbmap, nodes)
    tags_list = sorted(set(tags.values()))
    tags_count = len(tags_list)

    pylab.figure()
    pylab.subplot(211)
    pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)

    plots = [hist(masters, nodes_dict)] + \
            [hist(r, nodes_dict) for r in replicas]
    labels = ['master'] + ['replica %d' % i for i in xrange(len(replicas))]

    pylab.hist(plots, bins=xrange(nodes_count + 1), label=labels)
    pylab.title("Number of vbuckets per node")
    pylab.xlabel("Nodes")
    pylab.ylabel("Number of vbuckets")
    pylab.legend()

    pylab.subplot(212)
    pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)

    all_replicas = list(chain(*replicas))
    pylab.hist(hist(all_replicas, nodes_dict), bins=xrange(nodes_count + 1),
               label='all replicas', rwidth=0.5)
    pylab.title("Number of replica vbuckets per node")
    pylab.xlabel("Nodes")
    pylab.ylabel("Number of vbuckets")
    pylab.legend()

    pylab.figure()
    pylab.subplot(211)

    plots = [[tags[n] for n in masters]] + \
            [[tags[n] for n in r] for r in replicas]

    pylab.hist(plots, bins=xrange(tags_count + 1), label=labels)

    pylab.xticks([i + 0.5 for i in xrange(tags_count)], tags_list)
    pylab.title("Number of vbuckets per tag")
    pylab.xlabel("Tags")
    pylab.ylabel("Number of vbuckets")
    pylab.legend()

    pylab.subplot(212)
    pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)
    pylab.title("Number of nodes each node replicates to per tag")
    pylab.xlabel("Nodes")
    pylab.ylabel("Number of replica nodes")

    tags_repcounts = tag_replication_counts(vbmap['map'], nodes, tags_list, tags)

    plots = []
    for tag_counts in tags_repcounts:
        plot = []

        for node, count in enumerate(tag_counts):
            plot.extend([node] * count)
        plots.append(plot)

    pylab.hist(plots, bins=xrange(nodes_count + 1), label=map(str, tags))
    pylab.legend()

    simulate(vbmap)

    pylab.show()

Example #60

0

Show file

File: rawJATS.py Project: jhroot/elife-tools

def day(soup):
    return first(extract_nodes(soup, "day"))