Esempio n. 1
0
def create_ontology_map(golr_base_url):
    ontology = utils.golr_fetch(golr_base_url, select_ontology)
    ontology = ontology['response']['docs']
    map = {}
    for item in ontology:
        map[item['annotation_class']] = item
    return map
Esempio n. 2
0
def golr_fetch_bioentities_taxon(taxon):
    url = "select?fq=document_category:%22bioentity%22&q=*:*&wt=json&rows=0&facet=true&facet.field=type&facet.field=taxon&facet.limit=1000000&facet.mincount=1&fq=taxon:\"" + taxon + "\""
    response = utils.golr_fetch(golr_base_url, url)

    # multiple queries: a bit complicated but necessary due to solr 3.6 unable to do composite faceting and for speed considerations
    # * can indicate the is_a closure to find the stats on that specific aspect
    # * if evidence code was present, we could use a similar strategy
    url_bp = "select?fq=document_category:%22bioentity%22&q=*:*&wt=json&facet=true&facet.field=type&facet.field=taxon&facet.limit=1000000&facet.mincount=1&rows=0&fq=taxon:\"" + taxon + "\"&fq=isa_partof_closure:\"" + BP + "\""
    response_bp = utils.golr_fetch(golr_base_url, url_bp)

    url_mf = "select?fq=document_category:%22bioentity%22&q=*:*&wt=json&facet=true&facet.field=type&facet.field=taxon&facet.limit=1000000&facet.mincount=1&rows=0&fq=taxon:\"" + taxon + "\"&fq=isa_partof_closure:\"" + MF + "\""
    response_mf = utils.golr_fetch(golr_base_url, url_mf)

    url_cc = "select?fq=document_category:%22bioentity%22&q=*:*&wt=json&facet=true&facet.field=type&facet.field=taxon&facet.limit=1000000&facet.mincount=1&rows=0&fq=taxon:\"" + taxon + "\"&fq=isa_partof_closure:\"" + CC + "\""
    response_cc = utils.golr_fetch(golr_base_url, url_cc)

    return {ALL: response, BP: response_bp, MF: response_mf, CC: response_cc}
Esempio n. 3
0
def golr_fetch_annotation_by_evidence_by_species(taxon, exclude_pb_only):
    options = ""
    if exclude_pb_only:
        options = "&fq=!annotation_class:\"GO:0005515\""

    url = 'select?fq=document_category:%22annotation%22&q=*:*&wt=json&fq=taxon:%22' + taxon + '%22&facet=true&facet.field=evidence_type&facet.limit=10000&rows=0' + options
    response = utils.golr_fetch(golr_base_url, url)

    url_bp = 'select?fq=document_category:%22annotation%22&q=*:*&wt=json&fq=taxon:%22' + taxon + '%22&facet=true&facet.field=evidence_type&facet.limit=10000&rows=0&fq=isa_partof_closure:\"' + BP + '\"' + options
    response_bp = utils.golr_fetch(golr_base_url, url_bp)

    url_mf = 'select?fq=document_category:%22annotation%22&q=*:*&wt=json&fq=taxon:%22' + taxon + '%22&facet=true&facet.field=evidence_type&facet.limit=10000&rows=0&fq=isa_partof_closure:\"' + MF + '\"' + options
    response_mf = utils.golr_fetch(golr_base_url, url_mf)

    url_cc = 'select?fq=document_category:%22annotation%22&q=*:*&wt=json&fq=taxon:%22' + taxon + '%22&facet=true&facet.field=evidence_type&facet.limit=10000&rows=0&fq=isa_partof_closure:\"' + CC + '\"' + options
    response_cc = utils.golr_fetch(golr_base_url, url_cc)

    return {ALL: response, BP: response_bp, MF: response_mf, CC: response_cc}
Esempio n. 4
0
def get_references():
    refs = utils.golr_fetch(golr_base_url, golr_select_references)
    refs = utils.build_map(refs['facet_counts']['facet_fields']['reference'])
    return refs
Esempio n. 5
0
def compute_stats(golr_url, release_date, exclude_pb_only=False):
    """
    compute stats on GO annotations - can specify if we include or exclude annotations to protein binding only
    """
    global golr_base_url
    golr_base_url = golr_url

    print("Will use golr url: ", golr_base_url)

    print("1 / 4 - Fetching GO terms...")
    all_terms = utils.golr_fetch(golr_base_url, golr_select_ontology)
    print("Done.")

    print("2 / 4 - Fetching GO annotations...")
    if exclude_pb_only:
        all_annotations = utils.golr_fetch(
            golr_base_url, golr_select_annotations_no_pbinding)
    else:
        all_annotations = utils.golr_fetch(golr_base_url,
                                           golr_select_annotations)
    print("Done.")

    print("3 / 4 - Fetching GO bioentities...")
    all_entities = utils.golr_fetch(golr_base_url, golr_select_bioentities)

    # we have to manually update the facts of the first query if we want to remove the bioentities annotated only to protein binding
    if exclude_pb_only:
        all_entities_no_pb = utils.golr_fetch(golr_base_url,
                                              golr_select_bioentities_pb)
        # print(all_entities_no_pb)
        entities_type_no_pb = {}
        entities_taxon_no_pb = {}

        count = 0

        for doc in all_entities_no_pb['response']['docs']:
            if len(doc['annotation_class_list']) > 1:
                continue
            count += 1
            if doc['type'] in entities_type_no_pb:
                entities_type_no_pb[doc['type']] += 1
            else:
                entities_type_no_pb[doc['type']] = 1

            if doc['taxon'] in entities_type_no_pb:
                entities_taxon_no_pb[doc['taxon']] += 1
            else:
                entities_taxon_no_pb[doc['taxon']] = 1

        # finally update the type facet field
        types = all_entities['facet_counts']['facet_fields']['type']
        for i in range(0, len(types), 2):
            ctype = types[i]
            retr_value = entities_type_no_pb[
                ctype] if ctype in entities_type_no_pb else 0
            types[i + 1] = types[i + 1] - retr_value
        all_entities['facet_counts']['facet_fields']['type'] = types

        all_entities['response'][
            'numFound'] = all_entities['response']['numFound'] - count

        # and update the taxon facet field
        taxons = all_entities['facet_counts']['facet_fields']['taxon']
        for i in range(0, len(taxons), 2):
            ctaxon = taxons[i]
            retr_value = entities_taxon_no_pb[
                ctaxon] if ctaxon in entities_taxon_no_pb else 0
            taxons[i + 1] = taxons[i + 1] - retr_value
        all_entities['facet_counts']['facet_fields']['taxon'] = taxons

    print("Done.")

    qualifiers = utils.golr_fetch(golr_base_url, golr_select_qualifiers)
    qualifiers = utils.build_map(
        qualifiers['facet_counts']['facet_fields']['qualifier'])

    print("4 / 4 - Creating Stats...")
    prepare_globals(all_annotations)
    print("\t4a - globals prepared")
    stats = create_stats(all_terms, all_annotations, all_entities,
                         release_date, qualifiers, exclude_pb_only)
    print("Done.")

    return stats
Esempio n. 6
0
def golr_fetch_references_group(group):
    url = "select?fq=document_category:%22annotation%22&q=*:*&wt=json&rows=0&facet.limit=10000000&facet.mincount=1&facet=true&facet.field=reference&fq=assigned_by:\"" + group + "\""
    response = utils.golr_fetch(golr_base_url, url)
    return response