Beispiel #1
0
def get_user_info(user_id, host):
    params = {'action': 'query', 'meta': 'globaluserinfo', 'guiid': user_id}
    session = Session(host)
    res = session.get(**params)
    if 'error' in res:
        return {}
    return res.get('query', {}).get('globaluserinfo', {})
Beispiel #2
0
def format_value(session: mwapi.Session, property_id: str,
                 value: dict) -> flask.Markup:
    response = session.get(action='wbformatvalue',
                           datavalue=json.dumps(value),
                           property=property_id,
                           generate='text/plain')
    return flask.Markup.escape(response['result'])
Beispiel #3
0
def get_entities(session: mwapi.Session, entity_ids: Iterable[str]) -> dict:
    entity_ids = list(set(entity_ids))
    entities = {}
    for chunk in [entity_ids[i:i+50] for i in range(0, len(entity_ids), 50)]:
        response = session.get(action='wbgetentities',
                               ids=chunk,
                               props=['info', 'claims'],
                               formatversion=2)
        entities.update(response['entities'])
    return entities
Beispiel #4
0
def save_entity(entity_data: dict,
                summary: str,
                base_revision_id: Union[int, str],
                session: mwapi.Session) -> int:

    token = edit_token(session)

    api_response = session.post(action='wbeditentity',
                                id=entity_data['id'],
                                data=json.dumps(entity_data),
                                summary=summary,
                                baserevid=base_revision_id,
                                token=token)
    revision_id = api_response['entity']['lastrevid']

    return revision_id
Beispiel #5
0
def edit_token(session: mwapi.Session) -> str:
    """Get an edit token / CSRF token for the MediaWiki API.

    Not to be confused with csrf_token,
    which gets a token for use within the tool."""

    edit_tokens = flask.g.setdefault('edit_tokens', {})
    key = session.host
    if key in edit_tokens:
        return edit_tokens[key]

    token = session.get(action='query',
                        meta='tokens',
                        type='csrf')['query']['tokens']['csrftoken']
    edit_tokens[key] = token
    return token
Beispiel #6
0
def _get_sitematrix(session: mwapi.Session) -> dict:
    sitematrix = {
        'by_dbname': {},
        'by_url': {},
    }  # type: Dict[str, Dict[str, dict]]
    result = session.get(action='sitematrix', formatversion=2)
    for k, v in result['sitematrix'].items():
        if k == 'count':
            if v > 5000:
                warnings.warn('sitematrix reports more than 5000 sites (%d), '
                              'continuation might be necessary' % v)
            continue
        if k == 'specials':
            sites = v
        else:
            sites = v['site']
        for site in sites:
            sitematrix['by_dbname'][site['dbname']] = site
            sitematrix['by_url'][site['url']] = site
    return sitematrix
Beispiel #7
0
def prefetch_entities(session: mwapi.Session, entity_ids: AbstractSet[str]):
    with format_entity_cache_lock:
        entity_id_chunks: List[List[str]] = []
        for entity_id in entity_ids:
            key = format_entity_key(session, entity_id)
            if key in format_entity_cache:
                continue
            if len(entity_id_chunks) == 0:
                entity_id_chunks.append([entity_id])
            else:
                last_chunk = entity_id_chunks[-1]
                if len(last_chunk) >= 50:
                    entity_id_chunks.append([entity_id])
                else:
                    last_chunk.append(entity_id)
        for entity_id_chunk in entity_id_chunks:
            response = session.get(action='wbformatentities',
                                   ids=entity_id_chunk,
                                   formatversion=2)['wbformatentities']
            for entity_id in response:
                key = format_entity_key(session, entity_id)
                value = flask.Markup(response[entity_id])
                format_entity_cache[key] = value
import numpy as np

from maya.nltk import util
from pandarallel import pandarallel

from script.TrustScore import TrustScore

sys.path.append("..")
from mwapi import Session
from maya.extractors.api import Extractor
import pandas as pd
from matplotlib import pyplot

import os.path

session = Session("https://en.wikipedia.org/w/api.php", user_agent="test")
api_extractor = Extractor(session)

# pandarallel.initialize(nb_workers=10)
pandarallel.initialize()


def getAllUsers():
    """
      Fetches the list of all the user in Wikipedia that has contributed.
      Args:

      Result:
          the dataframe of the list of users and their editcounts
       """
    uc = api_extractor.get_all_user()
Beispiel #9
0
def format_entity(session: mwapi.Session, entity_id: str) -> flask.Markup:
    response = session.get(action='wbformatentities',
                           ids=[entity_id],
                           formatversion=2)
    return flask.Markup(response['wbformatentities'][entity_id])