def get_data(wid): log(wid) use_caching(shouldnt_compute=True) #should be CombinedEntitiesService yo doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {}) doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {}) doc_ids_combined = {} if doc_ids_to_heads == {}: log(wid, "no heads") if doc_ids_to_entities == {}: log(wid, "no entities") from_s3 = json.loads( bucket.get_key('feature-data/page-%s.json' % wid).get_contents_as_string()) for doc_id in doc_ids_to_heads: entity_response = doc_ids_to_entities.get(doc_id, { 'titles': [], 'redirects': {} }) doc_ids_combined[doc_id] = (map( preprocess, entity_response['titles'] + entity_response['redirects'].keys() + entity_response['redirects'].values() + list(set(doc_ids_to_heads.get(doc_id, [])))) + from_s3.get(doc_id, [])) return doc_ids_combined.items()
def get_data_wid(wid): print wid use_caching(shouldnt_compute=True) # should be CombinedEntitiesService yo doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {}) doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {}) doc_ids_combined = {} if doc_ids_to_heads == {}: print wid, "no heads" if doc_ids_to_entities == {}: print wid, "no entities" for doc_id in doc_ids_to_heads: entity_response = doc_ids_to_entities.get(doc_id, {"titles": [], "redirects": {}}) doc_ids_combined[doc_id] = map( preprocess, entity_response["titles"] + entity_response["redirects"].keys() + entity_response["redirects"].values() + list(set(doc_ids_to_heads.get(doc_id, []))), ) return doc_ids_combined.items()
def get_data_wid(wid): print wid use_caching(shouldnt_compute=True) #should be CombinedEntitiesService yo doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {}) doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {}) doc_ids_combined = {} if doc_ids_to_heads == {}: print wid, "no heads" if doc_ids_to_entities == {}: print wid, "no entities" for doc_id in doc_ids_to_heads: entity_response = doc_ids_to_entities.get(doc_id, { 'titles': [], 'redirects': {} }) doc_ids_combined[doc_id] = map( preprocess, entity_response['titles'] + entity_response['redirects'].keys() + entity_response['redirects'].values() + list(set(doc_ids_to_heads.get(doc_id, [])))) return doc_ids_combined.items()
def get_data(wid): log(wid) use_caching(shouldnt_compute=True) #should be CombinedEntitiesService yo doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {}) doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {}) doc_ids_combined = {} if doc_ids_to_heads == {}: log(wid, "no heads") if doc_ids_to_entities == {}: log(wid, "no entities") from_s3 = json.loads(bucket.get_key( 'feature-data/page-%s.json' % wid).get_contents_as_string()) for doc_id in doc_ids_to_heads: entity_response = doc_ids_to_entities.get( doc_id, {'titles': [], 'redirects': {}}) doc_ids_combined[doc_id] = (map(preprocess, entity_response['titles'] + entity_response['redirects'].keys() + entity_response['redirects'].values() + list(set(doc_ids_to_heads.get(doc_id, [])))) + from_s3.get(doc_id, [])) return doc_ids_combined.items()