def get_page_coordinates(file_from=None, file_to=None, language=LANG, state_file=None): if not (isinstance(language, str) and len(language) == 2): raise ValueError('Language should be a 2 char code: en, uk, etc.') if not (file_from is None or isinstance(file_from, str)): raise ValueError('File should be file path string.') if not (file_to is None or isinstance(file_to, str)): raise ValueError('File should be file path string') if not (isinstance(state_file, str) or state_file is None): raise ValueError('State File should be file path string.') file_from = file_from if file_from else FILE_FROM % language file_to = file_to if file_to else FILE_TO % language to_files() dataframe = pandas.read_csv(file_from, sep='\t') dataframe = dataframe.astype({'pageid': 'int64'}) pageid = dataframe['pageid'] params = {'prop': 'revisions', 'rvprop': 'content', 'pageids': pageid} response_table = { 'revisions': { 'pageid': 'int64', 'ns': 'int64', 'title': str, 'coordinates': str, 'lat': 'float64', 'long': 'float64' } } custom = { 'revisions': { ('coordinates', 'lat', 'long'): wikiAPI.get_coordinates_data } } response = wikiAPI.WikiResponse(response_table, custom=custom, file=file_to) print(response) request = wikiAPI.WikiSafeRequestMultiplePage(params, language, on_response=response, state_file=state_file) request.language = language request.send_all() response.save() print(response.show()) to_functions()
def get_langlinks(link_language, file_from=None, file_to=None, language=LANG, state_file=None): if not (isinstance(link_language, str) and len(link_language) == 2): raise ValueError('Language should be a 2 char code: en, uk, etc.') if not (isinstance(language, str) and len(language) == 2): raise ValueError('Language should be a 2 char code: en, uk, etc.') if not (file_from is None or isinstance(file_from, str)): raise ValueError('File should be file path string.') if not (file_to is None or isinstance(file_to, str)): raise ValueError('File should be file path string') if not (isinstance(state_file, str) or state_file is None): raise ValueError('State File should be file path string.') file_from = file_from if file_from else FILE_FROM % language file_to = file_to if file_to else FILE_TO % (language, link_language) to_files() dataframe = pandas.read_csv(file_from, sep='\t') dataframe = dataframe.astype({'pageid': 'int64'}) pageid = dataframe['pageid'] params = { 'prop': 'langlinks', 'lllang': link_language, 'pageids': pageid } response_table = { 'langlinks': { 'pageid': 'int64', 'ns': 'int64', 'title': str, 'lang': str, '*': str } } response = wikiAPI.WikiResponse(response_table, file=file_to) request = wikiAPI.WikiSafeRequestMultiplePage( params, language, on_response=response, state_file=state_file ) request.language = language request.send_all() response.save() to_functions()
def get_all_pages(file=None, language=LANG, state_file=None): if not (isinstance(language, str) and len(language) == 2): raise ValueError('Language should be a 2 char code: en, uk, etc.') if not(file is None or isinstance(file, str)): raise ValueError('File should be file path string.') if not (isinstance(state_file, str) or state_file is None): raise ValueError('State File should be file path string.') file = file if file else FILE % language to_files() params = { 'list': 'allpages', 'aplimit': 'max', 'apfilterredir': 'nonredirects', 'apcontinue': '' } response_table = { 'allpages': { 'pageid': 'int64', 'ns': 'int64', 'title': str } } response = wikiAPI.WikiResponse(response_table, file=file) request = wikiAPI.WikiSafeRequest( params, language, on_response=response, state_file=state_file ) request.send_all() response.save() print(response.show()) to_functions()
import sys sys.path.insert(0, '..\\..\\modules') import wikiAPI ''' In this example we will send a prop query for multiple pages. For this you just use a list, tuple or pandas.Series instead of string in the titles (like this example) or pageids parameter. The wikiAPI will send the requests for these one by one. Note: the request max parameter specifies how many pages we want to get in a single response (maximal value is 50). Here it is set to 1 to show you the functionality of the function. Note: you can not use both titles and pageids in the same query ''' params = { 'prop': 'revisions', 'rvprop': 'user|timestamp', 'titles': ['Microsoft', 'Apple', 'Berlin'] } response_table = {'revisions': {'user': str, 'timestamp': str}} response = wikiAPI.WikiResponse(response_table) request = wikiAPI.WikiRequestMultiplePage(params, on_response=response, max=1) request.send_all() print(response.show())
is not deleted automatically by the program. ''' # IMPORTANT! THE save_every should be the same for Response and Request save_every = 1 params = { 'list': 'allcategories', 'aclimit': '20' } response_table = { 'allcategories': { '*': str } } response = wikiAPI.WikiResponse( response_table, file='all_categories.csv', save_every=save_every ) request = wikiAPI.WikiSafeRequest( params, on_response=response, save_every=save_every ) request.send() print(response.show())
def get_revisions(file_from=None, file_to=None, language=LANG, state_file=None, direction=None): if not (isinstance(language, str) and len(language) == 2): raise ValueError('Language should be a 2 char code: en, uk, etc.') if not (file_from is None or isinstance(file_from, str)): raise ValueError('File should be file path string.') if not (file_to is None or isinstance(file_to, str)): raise ValueError('File should be file path string') if not (isinstance(state_file, str) or state_file is None): raise ValueError('State File should be file path string.') if not direction in ('older', 'newer', None): raise ValueError('Wrong direction %s - can be older or newer' % direction) to_files() direction = direction if direction else DIRECTION file_from = file_from if file_from else FILE_FROM % language file_to = file_to if file_to else FILE_TO % language dataframe = pandas.read_csv(file_from, sep='\t') dataframe = dataframe.astype({'pageid': 'int64'}) pageid = dataframe['pageid'] params = { 'prop': 'revisions', 'rvprop': 'user|timestamp', 'pageids': pageid } response_table = { 'revisions': { 'pageid': 'int64', 'ns': 'int64', 'title': str, 'user': str, 'userhidden': str, 'anon': bool, 'timestamp': object } } custom = {'revisions': {'timestamp': wikiAPI.to_date}} response = wikiAPI.WikiResponse(response_table, custom=custom, file=file_to) request = wikiAPI.WikiSafeRequestMultiplePage(params, language, on_response=response, state_file=state_file, max=50) request.send_all() response.save() print(response.show()) to_functions()
""" def to_date(revision, page): return datetime.datetime.strptime(revision['timestamp'], "%Y-%m-%dT%H:%M:%SZ") params = { 'prop': 'revisions', 'rvprop': 'user|timestamp', 'rvlimit': 'max', 'titles': 'Wardersee' } response_table = { 'revisions': { 'pageid': 'int64', 'user': str, 'timestamp': object, 'anon': 'bool' } } custom = { 'revisions': { 'timestamp': to_date } } # create a response handler - Object with update function response = wikiAPI.WikiResponse(response_table, custom=custom) # create a request with all the parameters, send it and show the results request = wikiAPI.WikiRequest(params, on_response=response) request.send_all() print(response.show())