Esempio n. 1
0
def consumer(change_id, title):    
    # make query to wikipedia by title
    queryRequestUrlForPage = "http://en.wikipedia.org/w/api.php?action=query&format=json&titles=" + urllib2.quote(title) + "&prop=redirects|revisions"
    queryResponseForPage = json.load(urllib2.urlopen(queryRequestUrlForPage))
    d = {}
    d['page'] = queryResponseForPage['query']['pages'][queryResponseForPage['query']['pages'].keys()[0]]

    # revision of a certain page may be missing
    # ignore case with missing revisions
    if not 'revisions' in d['page']:
        pass
    else:
        revision_id = d['page']['revisions'][0]['revid']
        # reference for parameters: http://www.mediawiki.org/wiki/API:Revisions
        queryRequestUrlForRevision = "http://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&rvprop=content|flags|ids|comment|timestamp|sha1&revids=" + str(revision_id)
        queryResponseForRevision = json.load(urllib2.urlopen(queryRequestUrlForRevision))
        d['revision'] = queryResponseForRevision['query']['pages'][queryResponseForRevision['query']['pages'].keys()[0]]['revisions'][0]
        # update DB
        update(d)
        page_id = d['page']['pageid']
        updatePagelinks(page_id, title)

    # delete row from waiting list
    try:
        cnx = connection.MySQLConnection(user='******', password='', host='localhost', database='wikidb')
        cursor = cnx.cursor()
        sqlDelete = "DELETE FROM ChangeTable WHERE change_id = %s"
        cursor.execute(sqlDelete, (str(change_id), ))
        cnx.commit()
  
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("Something is wrong with your user name or password")
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print("Database does not exist")
        else:
            print(err)
    else:
        cnx.close()
def consumer(page_id, page_title):
    updatePagelinks(page_id, page_title)
    f=open('lastPageID_allPagelinks.txt','w')
    f.write(str(page_id))