Esempio n. 1
0
def conflict(soup):
    conflict_tags = extract_nodes(soup, "fn", attr="fn-type", value="conflict")
    conflict_tags += extract_nodes(soup,
                                   "fn",
                                   attr="fn-type",
                                   value="COI-statement")
    return conflict_tags
Esempio n. 2
0
def abstract(soup, abstract_type=None):
    if abstract_type:
        return extract_nodes(soup,
                             "abstract",
                             attr="abstract-type",
                             value=abstract_type)
    else:
        return extract_nodes(soup, "abstract")
Esempio n. 3
0
def custom_meta(soup, meta_name=None):
    custom_meta_tags = extract_nodes(soup, "custom-meta")
    if meta_name is not None:
        custom_meta_tags = filter(
            lambda tag: node_contents_str(
                first(extract_nodes(tag, "meta-name"))) == meta_name,
            custom_meta_tags)
    return custom_meta_tags
Esempio n. 4
0
def authors(soup, contrib_type="author"):
    if contrib_type:
        return extract_nodes(soup,
                             "contrib",
                             attr="contrib-type",
                             value=contrib_type)
    else:
        return extract_nodes(soup, "contrib")
Esempio n. 5
0
def fn_group(soup, content_type=None):
    if content_type:
        return extract_nodes(soup,
                             "fn-group",
                             attr="content-type",
                             value=content_type)
    else:
        return extract_nodes(soup, "fn-group")
Esempio n. 6
0
def pub_id(soup, pub_id_type=None):
    if pub_id_type:
        return extract_nodes(soup,
                             "pub-id",
                             attr="pub-id-type",
                             value=pub_id_type)
    else:
        return extract_nodes(soup, "pub-id")
Esempio n. 7
0
def ext_link(soup, ext_link_type=None):
    if ext_link_type:
        return extract_nodes(soup,
                             "ext-link",
                             attr="ext-link-type",
                             value=ext_link_type)
    else:
        return extract_nodes(soup, "ext-link")
Esempio n. 8
0
def journal_id(soup):
    # the first non-nil tag
    return firstnn(
        extract_nodes(soup,
                      "journal-id",
                      attr="journal-id-type",
                      value="publisher-id"))
Esempio n. 9
0
def research_organism_keywords(soup):
    tags = first(
        extract_nodes(soup,
                      "kwd-group",
                      attr="kwd-group-type",
                      value="research-organism"))
    if not tags:
        return None
    return filter(lambda tag: tag.name == "kwd", tags) or None
Esempio n. 10
0
def author_keywords(soup):
    # A few articles have kwd-group with no kwd-group-type, so account for those
    tags = extract_nodes(soup, "kwd-group")
    keyword_tags = []
    for tag in tags:
        if (tag.get("kwd-group-type") == "author-keywords" 
            or tag.get("kwd-group-type") is None):
            keyword_tags += filter(lambda tag: tag.name == "kwd", tag)
    return keyword_tags
Esempio n. 11
0
def article_contributors(soup):
    article_meta_tag = article_meta(soup)
    if article_meta_tag:
        contributor_tags = extract_nodes(article_meta_tag,
                                         ["contrib", "on-behalf-of"])
        return filter(lambda tag: tag.parent.name == "contrib-group",
                      contributor_tags)
    else:
        return None
Esempio n. 12
0
def author_keywords(soup):
    # A few articles have kwd-group with no kwd-group-type, so account for those
    tags = extract_nodes(soup, "kwd-group")
    keyword_tags = []
    for tag in tags:
        if (tag.get("kwd-group-type") == "author-keywords"
                or tag.get("kwd-group-type") is None):
            keyword_tags += filter(lambda tag: tag.name == "kwd", tag)
    return keyword_tags
Esempio n. 13
0
def full_subject_area(soup, subject_group_type=None):

    subject_group_tags = extract_nodes(soup, "subj-group")
    subject_group_tags = filter(lambda tag: tag.parent.name == "article-categories"
                                              and tag.parent.parent.name == "article-meta", subject_group_tags)

    if subject_group_type:
        subject_group_tags = filter(lambda tag:
                                    tag.get("subj-group-type" == subject_group_type))

    return subject_group_tags
Esempio n. 14
0
def subject_area(soup, subject_group_type = None):
    # Supports all subject areas or just particular ones filtered by 
    subject_area_tags = []
    tags = extract_nodes(soup, "subject")
    
    subject_area_tags = filter(lambda tag: tag.parent.name == "subj-group" \
                                           and tag.parent.parent.name == "article-categories" \
                                           and tag.parent.parent.parent.name == "article-meta", tags)
    if subject_group_type:
        subject_area_tags = filter(lambda tag:
                                    tag.parent.get("subj-group-type") == subject_group_type, tags)
    return subject_area_tags
Esempio n. 15
0
def full_subject_area(soup, subject_group_type=None):

    subject_group_tags = extract_nodes(soup, "subj-group")
    subject_group_tags = filter(
        lambda tag: tag.parent.name == "article-categories" and tag.parent.
        parent.name == "article-meta", subject_group_tags)

    if subject_group_type:
        subject_group_tags = filter(
            lambda tag: tag.get("subj-group-type" == subject_group_type))

    return subject_group_tags
Esempio n. 16
0
def subject_area(soup, subject_group_type=None):
    # Supports all subject areas or just particular ones filtered by
    subject_area_tags = []
    tags = extract_nodes(soup, "subject")

    subject_area_tags = filter(lambda tag: tag.parent.name == "subj-group" \
                                           and tag.parent.parent.name == "article-categories"
                                           and tag.parent.parent.parent.name == "article-meta", tags)
    if subject_group_type:
        subject_area_tags = filter(
            lambda tag: tag.parent.get("subj-group-type") ==
            subject_group_type, tags)
    return subject_area_tags
Esempio n. 17
0
def simulate(vbmap):
    pylab.figure()

    nodes = extract_nodes(vbmap['map'])
    nodes_count = len(nodes)

    vbmaps = simulate_failovers(vbmap)

    charts_count = len(vbmaps)
    rows = cols = int(math.ceil(math.sqrt(charts_count)))

    def plot(vbmap, chart):
        pylab.subplot(rows, cols, chart)
        masters = [n for n in extract_masters(vbmap) if n is not None]

        pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)
        pylab.hist(masters, bins=xrange(nodes_count + 1))
        pylab.xlabel("Nodes")
        pylab.ylabel("Number of vbuckets")
        pylab.legend()

    for chart, vbmap in enumerate(vbmaps, 1):
        plot(vbmap, chart)
Esempio n. 18
0
def award_group(soup):
    return extract_nodes(soup, "award-group")
Esempio n. 19
0
def list_item(soup):
    return extract_nodes(soup, "list-item")
Esempio n. 20
0
def fig_group(soup):
    return extract_nodes(soup, "fig-group")
Esempio n. 21
0
def year(soup):
    return first(extract_nodes(soup, "year"))
Esempio n. 22
0
def publisher_id(soup):
    article_id_tags = extract_nodes(soup, "article-id", attr = "pub-id-type", value = "publisher-id")
    # the first article-id tag whose parent is article-meta
    return first(filter(lambda tag: tag.parent.name == "article-meta", article_id_tags))
Esempio n. 23
0
def month(soup):
    return first(extract_nodes(soup, "month"))
Esempio n. 24
0
def history_date(soup, date_type):
    date_tags = extract_nodes(soup, "date", attr="date-type", value=date_type)
    return first(filter(lambda tag: tag.parent.name == "history", date_tags))
Esempio n. 25
0
def copyright_year(soup):
    return first(extract_nodes(permissions(soup), "copyright-year"))
Esempio n. 26
0
def copyright_statement(soup):
    return first(extract_nodes(permissions(soup), "copyright-statement"))
Esempio n. 27
0
def article_title(soup):
    return first(extract_nodes(soup, "article-title"))
Esempio n. 28
0
def licence_p(soup):
    return first(extract_nodes(licence(soup), "license-p"))
Esempio n. 29
0
def licence(soup):
    return first(extract_nodes(permissions(soup), "license"))
Esempio n. 30
0
def permissions(soup):
    # a better selector might be "article-meta.permissions"
    return first(extract_nodes(soup, "permissions"))
Esempio n. 31
0
def conflict(soup):
    return extract_nodes(soup, "fn", attr = "fn-type", value = "conflict")
Esempio n. 32
0
def string_name(soup):
    return extract_nodes(soup, "string-name")
Esempio n. 33
0
def acknowledgements(soup):
    return first(extract_nodes(soup, "ack"))
Esempio n. 34
0
def copyright_holder(soup):
    return first(extract_nodes(permissions(soup), "copyright-holder"))
Esempio n. 35
0
def funding_statement(soup):
    return first(extract_nodes(soup, "funding-statement"))
Esempio n. 36
0
def research_organism_keywords(soup):
    tags = first(extract_nodes(soup, "kwd-group", attr = "kwd-group-type", value = "research-organism"))
    if not tags:
        return None
    return filter(lambda tag: tag.name == "kwd", tags) or None   
Esempio n. 37
0
def journal_id(soup):
    # the first non-nil tag
    return firstnn(extract_nodes(soup, "journal-id", attr = "journal-id-type", value = "hwp"))
Esempio n. 38
0
def history_date(soup, date_type):
    date_tags = extract_nodes(soup, "date", attr = "date-type", value = date_type)
    return first(filter(lambda tag: tag.parent.name == "history", date_tags))
Esempio n. 39
0
def doi(soup):
    doi_tags = extract_nodes(soup, "article-id", attr = "pub-id-type", value = "doi")
    # the first article-id tag whose parent is article-meta
    return first(filter(lambda tag: tag.parent.name == "article-meta", doi_tags))
Esempio n. 40
0
def pub_date_collection(soup, pub_type):
    return first(extract_nodes(soup, "pub-date", attr = "pub-type", value = pub_type))
Esempio n. 41
0
def fig(soup):
    return extract_nodes(soup, "fig")
Esempio n. 42
0
def pub_date(soup, date_type):
    return first(extract_nodes(soup, "pub-date", attr = "date-type", value = date_type))
Esempio n. 43
0
def list(soup):
    return extract_nodes(soup, "list")
Esempio n. 44
0
def journal_issn(soup, pub_format):
    return first(
        extract_nodes(soup,
                      "issn",
                      attr="publication-format",
                      value=pub_format))
Esempio n. 45
0
def funding_group(soup):
    return extract_nodes(soup, "funding-group")
Esempio n. 46
0
def article_type(soup):
    # returns raw data, just that the data doesn't contain any BS nodes
    return first(extract_nodes(soup, "article")).get('article-type')
Esempio n. 47
0
def principal_award_recipient(soup):
    return extract_nodes(soup, "principal-award-recipient")
Esempio n. 48
0
def pub_date(soup, date_type):
    return first(
        extract_nodes(soup, "pub-date", attr="date-type", value=date_type))
Esempio n. 49
0
def journal_title(soup):
    return first(extract_nodes(soup, "journal-title"))
Esempio n. 50
0
def keyword_group(soup):
    return extract_nodes(soup, "kwd-group")
Esempio n. 51
0
def publisher(soup):
    return first(extract_nodes(soup, "publisher-name"))
Esempio n. 52
0
def article_type(soup):
    # returns raw data, just that the data doesn't contain any BS nodes
    return first(extract_nodes(soup, "article")).get('article-type')
Esempio n. 53
0
def article_meta(soup):
    return first(extract_nodes(soup, "article-meta"))
Esempio n. 54
0
def publisher(soup):
    return first(extract_nodes(soup, "publisher-name"))
Esempio n. 55
0
def pub_date_collection(soup, pub_type):
    return first(
        extract_nodes(soup, "pub-date", attr="pub-type", value=pub_type))
Esempio n. 56
0
def journal_issn(soup, pub_format):
    return first(extract_nodes(soup, "issn", attr = "publication-format", value = pub_format))
Esempio n. 57
0
def day(soup):
    return first(extract_nodes(soup, "day"))
Esempio n. 58
0
def journal_title(soup):
    return first(extract_nodes(soup, "journal-title"))
Esempio n. 59
0
def main():
    vbmap = load_vbmap(sys.argv[1])

    masters = extract_masters(vbmap['map'])
    replicas = extract_replicas(vbmap['map'])

    nodes = extract_nodes(vbmap['map'])
    nodes_count = len(nodes)

    nodes_dict = dict((n, i) for i, n in enumerate(nodes))

    tags = extract_tags(vbmap, nodes)
    tags_list = sorted(set(tags.values()))
    tags_count = len(tags_list)

    pylab.figure()
    pylab.subplot(211)
    pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)

    plots = [hist(masters, nodes_dict)] + \
            [hist(r, nodes_dict) for r in replicas]
    labels = ['master'] + ['replica %d' % i for i in xrange(len(replicas))]

    pylab.hist(plots, bins=xrange(nodes_count + 1), label=labels)
    pylab.title("Number of vbuckets per node")
    pylab.xlabel("Nodes")
    pylab.ylabel("Number of vbuckets")
    pylab.legend()

    pylab.subplot(212)
    pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)

    all_replicas = list(chain(*replicas))
    pylab.hist(hist(all_replicas, nodes_dict), bins=xrange(nodes_count + 1),
               label='all replicas', rwidth=0.5)
    pylab.title("Number of replica vbuckets per node")
    pylab.xlabel("Nodes")
    pylab.ylabel("Number of vbuckets")
    pylab.legend()

    pylab.figure()
    pylab.subplot(211)

    plots = [[tags[n] for n in masters]] + \
            [[tags[n] for n in r] for r in replicas]

    pylab.hist(plots, bins=xrange(tags_count + 1), label=labels)

    pylab.xticks([i + 0.5 for i in xrange(tags_count)], tags_list)
    pylab.title("Number of vbuckets per tag")
    pylab.xlabel("Tags")
    pylab.ylabel("Number of vbuckets")
    pylab.legend()

    pylab.subplot(212)
    pylab.xticks([i + 0.5 for i in xrange(nodes_count)], nodes)
    pylab.title("Number of nodes each node replicates to per tag")
    pylab.xlabel("Nodes")
    pylab.ylabel("Number of replica nodes")

    tags_repcounts = tag_replication_counts(vbmap['map'], nodes, tags_list, tags)

    plots = []
    for tag_counts in tags_repcounts:
        plot = []

        for node, count in enumerate(tag_counts):
            plot.extend([node] * count)
        plots.append(plot)

    pylab.hist(plots, bins=xrange(nodes_count + 1), label=map(str, tags))
    pylab.legend()

    simulate(vbmap)

    pylab.show()
Esempio n. 60
0
def day(soup):
    return first(extract_nodes(soup, "day"))