Esempio n. 1
0
def get_data(wid):
    log(wid)
    use_caching(shouldnt_compute=True)
    #should be CombinedEntitiesService yo
    doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {})
    doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {})
    doc_ids_combined = {}
    if doc_ids_to_heads == {}:
        log(wid, "no heads")
    if doc_ids_to_entities == {}:
        log(wid, "no entities")
    from_s3 = json.loads(
        bucket.get_key('feature-data/page-%s.json' %
                       wid).get_contents_as_string())
    for doc_id in doc_ids_to_heads:
        entity_response = doc_ids_to_entities.get(doc_id, {
            'titles': [],
            'redirects': {}
        })
        doc_ids_combined[doc_id] = (map(
            preprocess,
            entity_response['titles'] + entity_response['redirects'].keys() +
            entity_response['redirects'].values() +
            list(set(doc_ids_to_heads.get(doc_id, [])))) +
                                    from_s3.get(doc_id, []))
    return doc_ids_combined.items()
def get_data_wid(wid):
    print wid
    use_caching(shouldnt_compute=True)
    # should be CombinedEntitiesService yo
    doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {})
    doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {})
    doc_ids_combined = {}
    if doc_ids_to_heads == {}:
        print wid, "no heads"
    if doc_ids_to_entities == {}:
        print wid, "no entities"
    for doc_id in doc_ids_to_heads:
        entity_response = doc_ids_to_entities.get(doc_id, {"titles": [], "redirects": {}})
        doc_ids_combined[doc_id] = map(
            preprocess,
            entity_response["titles"]
            + entity_response["redirects"].keys()
            + entity_response["redirects"].values()
            + list(set(doc_ids_to_heads.get(doc_id, []))),
        )
    return doc_ids_combined.items()
def get_data_wid(wid):
    print wid
    use_caching(shouldnt_compute=True)
    #should be CombinedEntitiesService yo
    doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {})
    doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {})
    doc_ids_combined = {}
    if doc_ids_to_heads == {}:
        print wid, "no heads"
    if doc_ids_to_entities == {}:
        print wid, "no entities"
    for doc_id in doc_ids_to_heads:
        entity_response = doc_ids_to_entities.get(doc_id, {
            'titles': [],
            'redirects': {}
        })
        doc_ids_combined[doc_id] = map(
            preprocess,
            entity_response['titles'] + entity_response['redirects'].keys() +
            entity_response['redirects'].values() +
            list(set(doc_ids_to_heads.get(doc_id, []))))
    return doc_ids_combined.items()
def get_data(wid):
    log(wid)
    use_caching(shouldnt_compute=True)
    #should be CombinedEntitiesService yo
    doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {})
    doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {})
    doc_ids_combined = {}
    if doc_ids_to_heads == {}:
        log(wid, "no heads")
    if doc_ids_to_entities == {}:
        log(wid, "no entities")
    from_s3 = json.loads(bucket.get_key(
        'feature-data/page-%s.json' % wid).get_contents_as_string())
    for doc_id in doc_ids_to_heads:
        entity_response = doc_ids_to_entities.get(
            doc_id, {'titles': [], 'redirects': {}})
        doc_ids_combined[doc_id] = (map(preprocess,
                                        entity_response['titles'] +
                                        entity_response['redirects'].keys() +
                                        entity_response['redirects'].values() +
                                        list(set(doc_ids_to_heads.get(doc_id,
                                                                      [])))) +
                                    from_s3.get(doc_id, []))
    return doc_ids_combined.items()
Esempio n. 5
0
def heads(wid):
    #pprint(WikiToPageHeadsService().get_value(wid, {}))
    return WikiToPageHeadsService().get_value(wid, {})