def get_user_info(user_id, host): params = {'action': 'query', 'meta': 'globaluserinfo', 'guiid': user_id} session = Session(host) res = session.get(**params) if 'error' in res: return {} return res.get('query', {}).get('globaluserinfo', {})
def format_value(session: mwapi.Session, property_id: str, value: dict) -> flask.Markup: response = session.get(action='wbformatvalue', datavalue=json.dumps(value), property=property_id, generate='text/plain') return flask.Markup.escape(response['result'])
def get_entities(session: mwapi.Session, entity_ids: Iterable[str]) -> dict: entity_ids = list(set(entity_ids)) entities = {} for chunk in [entity_ids[i:i+50] for i in range(0, len(entity_ids), 50)]: response = session.get(action='wbgetentities', ids=chunk, props=['info', 'claims'], formatversion=2) entities.update(response['entities']) return entities
def save_entity(entity_data: dict, summary: str, base_revision_id: Union[int, str], session: mwapi.Session) -> int: token = edit_token(session) api_response = session.post(action='wbeditentity', id=entity_data['id'], data=json.dumps(entity_data), summary=summary, baserevid=base_revision_id, token=token) revision_id = api_response['entity']['lastrevid'] return revision_id
def edit_token(session: mwapi.Session) -> str: """Get an edit token / CSRF token for the MediaWiki API. Not to be confused with csrf_token, which gets a token for use within the tool.""" edit_tokens = flask.g.setdefault('edit_tokens', {}) key = session.host if key in edit_tokens: return edit_tokens[key] token = session.get(action='query', meta='tokens', type='csrf')['query']['tokens']['csrftoken'] edit_tokens[key] = token return token
def _get_sitematrix(session: mwapi.Session) -> dict: sitematrix = { 'by_dbname': {}, 'by_url': {}, } # type: Dict[str, Dict[str, dict]] result = session.get(action='sitematrix', formatversion=2) for k, v in result['sitematrix'].items(): if k == 'count': if v > 5000: warnings.warn('sitematrix reports more than 5000 sites (%d), ' 'continuation might be necessary' % v) continue if k == 'specials': sites = v else: sites = v['site'] for site in sites: sitematrix['by_dbname'][site['dbname']] = site sitematrix['by_url'][site['url']] = site return sitematrix
def prefetch_entities(session: mwapi.Session, entity_ids: AbstractSet[str]): with format_entity_cache_lock: entity_id_chunks: List[List[str]] = [] for entity_id in entity_ids: key = format_entity_key(session, entity_id) if key in format_entity_cache: continue if len(entity_id_chunks) == 0: entity_id_chunks.append([entity_id]) else: last_chunk = entity_id_chunks[-1] if len(last_chunk) >= 50: entity_id_chunks.append([entity_id]) else: last_chunk.append(entity_id) for entity_id_chunk in entity_id_chunks: response = session.get(action='wbformatentities', ids=entity_id_chunk, formatversion=2)['wbformatentities'] for entity_id in response: key = format_entity_key(session, entity_id) value = flask.Markup(response[entity_id]) format_entity_cache[key] = value
import numpy as np from maya.nltk import util from pandarallel import pandarallel from script.TrustScore import TrustScore sys.path.append("..") from mwapi import Session from maya.extractors.api import Extractor import pandas as pd from matplotlib import pyplot import os.path session = Session("https://en.wikipedia.org/w/api.php", user_agent="test") api_extractor = Extractor(session) # pandarallel.initialize(nb_workers=10) pandarallel.initialize() def getAllUsers(): """ Fetches the list of all the user in Wikipedia that has contributed. Args: Result: the dataframe of the list of users and their editcounts """ uc = api_extractor.get_all_user()
def format_entity(session: mwapi.Session, entity_id: str) -> flask.Markup: response = session.get(action='wbformatentities', ids=[entity_id], formatversion=2) return flask.Markup(response['wbformatentities'][entity_id])