def consumer(change_id, title): # make query to wikipedia by title queryRequestUrlForPage = "http://en.wikipedia.org/w/api.php?action=query&format=json&titles=" + urllib2.quote(title) + "&prop=redirects|revisions" queryResponseForPage = json.load(urllib2.urlopen(queryRequestUrlForPage)) d = {} d['page'] = queryResponseForPage['query']['pages'][queryResponseForPage['query']['pages'].keys()[0]] # revision of a certain page may be missing # ignore case with missing revisions if not 'revisions' in d['page']: pass else: revision_id = d['page']['revisions'][0]['revid'] # reference for parameters: http://www.mediawiki.org/wiki/API:Revisions queryRequestUrlForRevision = "http://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&rvprop=content|flags|ids|comment|timestamp|sha1&revids=" + str(revision_id) queryResponseForRevision = json.load(urllib2.urlopen(queryRequestUrlForRevision)) d['revision'] = queryResponseForRevision['query']['pages'][queryResponseForRevision['query']['pages'].keys()[0]]['revisions'][0] # update DB update(d) page_id = d['page']['pageid'] updatePagelinks(page_id, title) # delete row from waiting list try: cnx = connection.MySQLConnection(user='******', password='', host='localhost', database='wikidb') cursor = cnx.cursor() sqlDelete = "DELETE FROM ChangeTable WHERE change_id = %s" cursor.execute(sqlDelete, (str(change_id), )) cnx.commit() except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Something is wrong with your user name or password") elif err.errno == errorcode.ER_BAD_DB_ERROR: print("Database does not exist") else: print(err) else: cnx.close()
def consumer(page_id, page_title): updatePagelinks(page_id, page_title) f=open('lastPageID_allPagelinks.txt','w') f.write(str(page_id))