def get_labels(keys, name=None): keys = sorted(keys, key=lambda i: int(i[1:])) if name is None: name = '_'.join(keys) filename = f'cache/{name}_labels.json' labels = [] if os.path.exists(filename): from_cache = json.load(open(filename)) if isinstance(from_cache, dict) and from_cache.get('keys') == keys: labels = from_cache['labels'] if not labels: for cur in utils.chunk(keys, 50): labels += mediawiki.get_entities(cur, props='labels') json.dump({ 'keys': keys, 'labels': labels }, open(filename, 'w'), indent=2) return { entity['id']: wikibase.get_entity_label(entity) for entity in labels }
def get_labels(keys, name=None): keys = sorted(keys, key=lambda i: int(i[1:])) if name is None: name = hashlib.md5('_'.join(keys).encode('utf-8')).hexdigest() filename = f'cache/{name}_labels.json' labels = [] if os.path.exists(filename): from_cache = json.load(open(filename)) if isinstance(from_cache, dict) and from_cache.get('keys') == keys: labels = from_cache['labels'] if not labels: print(len(keys)) for num, cur in enumerate(utils.chunk(keys, 50)): print(f'{num * 50} / {len(keys)}') labels += mediawiki.get_entities(cur, props='labels') json.dump({ 'keys': keys, 'labels': labels }, open(filename, 'w'), indent=2) return { entity['id']: wikibase.get_entity_label(entity) for entity in labels }
def get_labels_db(keys): keys = set(keys) labels = {} missing = set() for qid in keys: item = Item.query.get(qid[1:]) if item: labels[qid] = item.label else: missing.add(qid) print(len(missing)) page_size = 50 for num, cur in enumerate(utils.chunk(missing, page_size)): print(f'{num * page_size} / {len(missing)}') for entity in mediawiki.get_entities(cur): if 'redirects' in entity: continue qid = entity['id'] modified = datetime.strptime(entity['modified'], "%Y-%m-%dT%H:%M:%SZ") # FIXME: check if the item is an artwork and set is_artwork correctly item = Item(item_id=qid[1:], entity=entity, lastrevid=entity['lastrevid'], modified=modified, is_artwork=False) database.session.add(item) labels[qid] = item.label database.session.commit() return labels
def get_labels_db(keys): keys = set(keys) labels = {} missing = set() for qid in keys: m = re_qid.match(qid) if m: item_id = int(m.group(1)) item = Item.query.get(item_id) if item: labels[qid] = item.label continue missing.add(qid) page_size = 50 try: for cur in utils.chunk(missing, page_size): for entity in mediawiki.get_entities(cur): if 'redirects' in entity: continue qid = entity['id'] modified = datetime.strptime(entity['modified'], "%Y-%m-%dT%H:%M:%SZ") # FIXME: check if the item is an artwork and set is_artwork correctly item = Item(item_id=qid[1:], entity=entity, lastrevid=entity['lastrevid'], modified=modified, is_artwork=False) database.session.add(item) labels[qid] = item.label database.session.commit() except requests.exceptions.ReadTimeout: pass return labels