Beispiel #1
0
def get_page_coordinates(file_from=None,
                         file_to=None,
                         language=LANG,
                         state_file=None):

    if not (isinstance(language, str) and len(language) == 2):
        raise ValueError('Language should be a 2 char code: en, uk, etc.')
    if not (file_from is None or isinstance(file_from, str)):
        raise ValueError('File should be file path string.')
    if not (file_to is None or isinstance(file_to, str)):
        raise ValueError('File should be file path string')
    if not (isinstance(state_file, str) or state_file is None):
        raise ValueError('State File should be file path string.')
    file_from = file_from if file_from else FILE_FROM % language
    file_to = file_to if file_to else FILE_TO % language

    to_files()

    dataframe = pandas.read_csv(file_from, sep='\t')
    dataframe = dataframe.astype({'pageid': 'int64'})
    pageid = dataframe['pageid']

    params = {'prop': 'revisions', 'rvprop': 'content', 'pageids': pageid}

    response_table = {
        'revisions': {
            'pageid': 'int64',
            'ns': 'int64',
            'title': str,
            'coordinates': str,
            'lat': 'float64',
            'long': 'float64'
        }
    }

    custom = {
        'revisions': {
            ('coordinates', 'lat', 'long'): wikiAPI.get_coordinates_data
        }
    }

    response = wikiAPI.WikiResponse(response_table,
                                    custom=custom,
                                    file=file_to)
    print(response)

    request = wikiAPI.WikiSafeRequestMultiplePage(params,
                                                  language,
                                                  on_response=response,
                                                  state_file=state_file)
    request.language = language
    request.send_all()

    response.save()
    print(response.show())

    to_functions()
def get_langlinks(link_language, file_from=None, file_to=None,
                  language=LANG, state_file=None):
    if not (isinstance(link_language, str) and len(link_language) == 2):
        raise ValueError('Language should be a 2 char code: en, uk, etc.')
    if not (isinstance(language, str) and len(language) == 2):
        raise ValueError('Language should be a 2 char code: en, uk, etc.')
    if not (file_from is None or isinstance(file_from, str)):
        raise ValueError('File should be file path string.')
    if not (file_to is None or isinstance(file_to, str)):
        raise ValueError('File should be file path string')
    if not (isinstance(state_file, str) or state_file is None):
        raise ValueError('State File should be file path string.')
    file_from = file_from if file_from else FILE_FROM % language
    file_to = file_to if file_to else FILE_TO % (language, link_language)

    to_files()

    dataframe = pandas.read_csv(file_from, sep='\t')
    dataframe = dataframe.astype({'pageid': 'int64'})
    pageid = dataframe['pageid']

    params = {
        'prop': 'langlinks',
        'lllang': link_language,
        'pageids': pageid
    }

    response_table = {
        'langlinks': {
            'pageid': 'int64',
            'ns': 'int64',
            'title': str,
            'lang': str,
            '*': str
        }
    }

    response = wikiAPI.WikiResponse(response_table, file=file_to)

    request = wikiAPI.WikiSafeRequestMultiplePage(
        params,
        language,
        on_response=response,
        state_file=state_file
    )
    request.language = language
    request.send_all()

    response.save()

    to_functions()
def get_all_pages(file=None, language=LANG, state_file=None):
    if not (isinstance(language, str) and len(language) == 2):
        raise ValueError('Language should be a 2 char code: en, uk, etc.')
    if not(file is None or isinstance(file, str)):
        raise ValueError('File should be file path string.')
    if not (isinstance(state_file, str) or state_file is None):
        raise ValueError('State File should be file path string.')
    file = file if file else FILE % language

    to_files()

    params = {
        'list': 'allpages',
        'aplimit': 'max',
        'apfilterredir': 'nonredirects',
        'apcontinue': ''
    }

    response_table = {
        'allpages': {
            'pageid': 'int64',
            'ns': 'int64',
            'title': str
        }
    }

    response = wikiAPI.WikiResponse(response_table, file=file)

    request = wikiAPI.WikiSafeRequest(
        params,
        language,
        on_response=response,
        state_file=state_file
    )
    request.send_all()

    response.save()
    print(response.show())

    to_functions()
Beispiel #4
0
import sys
sys.path.insert(0, '..\\..\\modules')

import wikiAPI
'''
In this example we will send a prop query for multiple pages.

For this you just use a list, tuple or pandas.Series instead of string in the
titles (like this example) or pageids parameter.
The wikiAPI will send the requests for these one by one.

Note: the request max parameter specifies how many pages we want to get in a 
single response (maximal value is 50). Here it is set to 1 to show you the 
functionality of the function.
Note: you can not use both titles and pageids in the same query
'''

params = {
    'prop': 'revisions',
    'rvprop': 'user|timestamp',
    'titles': ['Microsoft', 'Apple', 'Berlin']
}

response_table = {'revisions': {'user': str, 'timestamp': str}}

response = wikiAPI.WikiResponse(response_table)
request = wikiAPI.WikiRequestMultiplePage(params, on_response=response, max=1)
request.send_all()

print(response.show())
is not deleted automatically by the program.
'''

# IMPORTANT! THE save_every should be the same for Response and Request
save_every = 1

params = {
    'list': 'allcategories',
    'aclimit': '20'
}

response_table = {
    'allcategories': {
        '*': str
    }
}

response = wikiAPI.WikiResponse(
    response_table,
    file='all_categories.csv',
    save_every=save_every
)
request = wikiAPI.WikiSafeRequest(
    params,
    on_response=response,
    save_every=save_every
)
request.send()

print(response.show())
def get_revisions(file_from=None,
                  file_to=None,
                  language=LANG,
                  state_file=None,
                  direction=None):

    if not (isinstance(language, str) and len(language) == 2):
        raise ValueError('Language should be a 2 char code: en, uk, etc.')
    if not (file_from is None or isinstance(file_from, str)):
        raise ValueError('File should be file path string.')
    if not (file_to is None or isinstance(file_to, str)):
        raise ValueError('File should be file path string')
    if not (isinstance(state_file, str) or state_file is None):
        raise ValueError('State File should be file path string.')
    if not direction in ('older', 'newer', None):
        raise ValueError('Wrong direction %s - can be older or newer' %
                         direction)

    to_files()

    direction = direction if direction else DIRECTION
    file_from = file_from if file_from else FILE_FROM % language
    file_to = file_to if file_to else FILE_TO % language

    dataframe = pandas.read_csv(file_from, sep='\t')
    dataframe = dataframe.astype({'pageid': 'int64'})
    pageid = dataframe['pageid']

    params = {
        'prop': 'revisions',
        'rvprop': 'user|timestamp',
        'pageids': pageid
    }

    response_table = {
        'revisions': {
            'pageid': 'int64',
            'ns': 'int64',
            'title': str,
            'user': str,
            'userhidden': str,
            'anon': bool,
            'timestamp': object
        }
    }

    custom = {'revisions': {'timestamp': wikiAPI.to_date}}

    response = wikiAPI.WikiResponse(response_table,
                                    custom=custom,
                                    file=file_to)

    request = wikiAPI.WikiSafeRequestMultiplePage(params,
                                                  language,
                                                  on_response=response,
                                                  state_file=state_file,
                                                  max=50)
    request.send_all()

    response.save()
    print(response.show())

    to_functions()
"""


def to_date(revision, page):
    return datetime.datetime.strptime(revision['timestamp'], "%Y-%m-%dT%H:%M:%SZ")


params = {
    'prop': 'revisions', 'rvprop': 'user|timestamp', 'rvlimit': 'max',
    'titles': 'Wardersee'
}

response_table = {
    'revisions': { 'pageid': 'int64', 'user': str, 'timestamp': object,
                  'anon': 'bool' }
}

custom = {
    'revisions': {
        'timestamp': to_date
    }
}

# create a response handler - Object with update function
response = wikiAPI.WikiResponse(response_table, custom=custom)

# create a request with all the parameters, send it and show the results
request = wikiAPI.WikiRequest(params, on_response=response)
request.send_all()
print(response.show())