Python scrapeWebsite Beispiele

Programmiersprache: Python

Namespace / Paketname: WebsiteScapingLibrary

Methode / Funktion: scrapeWebsite

Beispiele auf hotexamples.com: 4

Python scrapeWebsite - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die WebsiteScapingLibrary.scrapeWebsite, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: WikipediaScrapingLibrary.py Projekt: jingcao33/1CademyBots

def findWikiprojectNumOfViews(WikiprojectEncodedTitle="WikiProject_Economics"):

    with open(WikiprojectEncodedTitle + '_Pages_Views.csv', 'wb') as fw:
        writer = csv.writer(fw)

        pubResultRow = [
            'WikiprojectTitle', 'Page Id', 'Page Title',
            '# views (last 90 days)'
        ]
        writer.writerow(pubResultRow)
        pagesReqParameteres = {'project': WikiprojectEncodedTitle}
        pagesReqURL = "https://alahele.ischool.uw.edu:8997/api/getProjectPages?" + \
            urllib.urlencode(pagesReqParameteres)

        print(pagesReqURL)
        responseNotRetrieved = True
        pagesResponse = {}
        pageDataList = {}

        TotalPagesViews = 0
        TotalPagesNum = 0
        errorNumber = 0
        while responseNotRetrieved and errorNumber <= 10:
            try:
                pagesResponseJSON = requests.get(pagesReqURL,
                                                 verify=False,
                                                 timeout=1000)
                pagesResponse = pagesResponseJSON.json()
                errorstatusAttribute = pagesResponse['errorstatus']
                if errorstatusAttribute == 'success':
                    responseNotRetrieved = False

            except (e):
                errorNumber += 1
                print("An exception occurred: " + str(e))
                time.sleep(4)
        if errorNumber >= 10:
            print("I am not able to request this page from Alahele server.")
            input()
        if len(pagesResponse['result'].keys()) != 0:
            pageDataList = pagesResponse['result'][WikiprojectEncodedTitle]
            for pageData in pageDataList:
                pageEncodedTitle = pageData['tp_title']
                pageEncodedTitle = fixurl(pageEncodedTitle)

                if "WikiProject_" in pageEncodedTitle:
                    pageEncodedTitle = "Wikipedia:" + pageEncodedTitle
                print("Page Title:", pageEncodedTitle)
                pageID = pageData['pp_id']
                print("Page ID:", pageID)
                trafficStatisticsURL = "http://stats.grok.se/en/latest90/" + pageEncodedTitle
                trafficStatisticsSoup = scrapeWebsite(trafficStatisticsURL,
                                                      'p', '', '', "", "", "")

                if trafficStatisticsSoup != False:
                    trafficPTag = strip_tags(
                        trafficStatisticsSoup.find(
                            'p').renderContents().decode('utf-8'))
                viewsOverPastNinetyDays = re.search(
                    '.*has been viewed (\d*).*', trafficPTag).group(1)

                print("# Views:", viewsOverPastNinetyDays)
                writer.writerow([
                    WikiprojectEncodedTitle.encode('utf-8'), pageID,
                    pageEncodedTitle.encode('utf-8'), viewsOverPastNinetyDays
                ])

                TotalPagesViews = num(viewsOverPastNinetyDays)
                TotalPagesNum += 1

        print("Average # Views over the past 90 days:",
              (TotalPagesViews / TotalPagesNum))

Beispiel #2

Datei anzeigen

def findWikiprojectNumOfViews(WikiprojectEncodedTitle="WikiProject_Economics"):

    with open(WikiprojectEncodedTitle + '_Pages_Views.csv', 'wb') as fw:
        writer = csv.writer(fw)

        pubResultRow = [
            'WikiprojectTitle', 'Page Id', 'Page Title',
            '# views (last 90 days)'
        ]
        writer.writerow(pubResultRow)

        # Define the parameters to be retrieved from alahele.ischool.uw.edu:8997 for Wikiproject pages.
        pagesReqParameteres = {'project': WikiprojectEncodedTitle}

        # Define the URL to be retrieved from alahele.ischool.uw.edu:8997 for Wikiproject pages.
        pagesReqURL = "https://alahele.ischool.uw.edu:8997/api/getProjectPages?" + urllib.urlencode(
            pagesReqParameteres)

        print pagesReqURL

        # Define a flag to show if there is any problem with the API response.
        responseNotRetrieved = True

        # Define the response object.
        pagesResponse = {}

        # Define the list of pages in this WikiProject.
        pageDataList = {}

        TotalPagesViews = 0
        TotalPagesNum = 0

        errorNumber = 0

        while responseNotRetrieved and errorNumber <= 10:
            try:
                # Retrieve list of the Wikiproject pages through the API.
                pagesResponseJSON = requests.get(pagesReqURL,
                                                 verify=False,
                                                 timeout=1000)

                # Convert the response to JSON.
                pagesResponse = pagesResponseJSON.json()

                # Retrieve the errorstatus attribute of the response.
                errorstatusAttribute = pagesResponse['errorstatus']

                # If errorstatus shows that the data is retrieved properly:
                if errorstatusAttribute == 'success':
                    responseNotRetrieved = False

            except Exception, e:

                errorNumber += 1

                print "An exception occurred: " + str(e)

                # Sleep for 4 seconds to be able to retrieve the page content appropriately.
                time.sleep(4)
        if errorNumber >= 10:
            print "I am not able to request this page from Alahele server."
            raw_input()

        # If there is any page under the scope of this project:
        if len(pagesResponse['result'].keys()) != 0:

            # Retrieve the result list of pages from the response.
            pageDataList = pagesResponse['result'][WikiprojectEncodedTitle]

            # For all the pages in the result list:
            for pageData in pageDataList:

                pageEncodedTitle = pageData['tp_title']

                # Find the page encoded URL.
                pageEncodedTitle = fixurl(pageEncodedTitle)

                if "WikiProject_" in pageEncodedTitle:
                    pageEncodedTitle = "Wikipedia:" + pageEncodedTitle

                print "Page Title:", pageEncodedTitle

                pageID = pageData['pp_id']

                print "Page ID:", pageID

                # Retrieve content of the traffic statistics page in BeautifulSoup structure.
                trafficStatisticsURL = "http://stats.grok.se/en/latest90/" + pageEncodedTitle

                # Find the soup structure if BeautifulSoup structure of the stats.grok.se page is available, and there is a p tag available in the soup structure of the stats.grok.se page, which means that the stats.grok.se page is in an appropriate format.
                trafficStatisticsSoup = scrapeWebsite(trafficStatisticsURL,
                                                      'p', '', '', "", "", "")

                if trafficStatisticsSoup != False:

                    # Retrieve the p tag including the number of views and the traffic ranking.
                    trafficPTag = strip_tags(
                        trafficStatisticsSoup.find(
                            'p').renderContents().decode('utf-8'))

                # Print out the number of views in the last 90 days.
                viewsOverPastNinetyDays = re.search(
                    '.*has been viewed (\d*).*', trafficPTag).group(1)

                print "# Views:", viewsOverPastNinetyDays

                # Write the Wikiproject info in the corresponding CSV file as a row.
                writer.writerow([
                    WikiprojectEncodedTitle.encode('utf-8'), pageID,
                    pageEncodedTitle.encode('utf-8'), viewsOverPastNinetyDays
                ])

                TotalPagesViews = num(viewsOverPastNinetyDays)
                TotalPagesNum += 1

        print "Average # Views over the past 90 days:", (TotalPagesViews /
                                                         TotalPagesNum)

Beispiel #3

Datei anzeigen

Datei: WikipediaScrapingLibrary.py Projekt: jingcao33/1CademyBots

def WikipediaPageStats(articleHyperlink, articleSoup, articleTitle,
                       desiredCategory):

    print("Wikipedia article title before correction:", articleTitle)

    appropriateArticleTitle = re.search("(.+) -", articleTitle)

    if appropriateArticleTitle != None:

        articleTitle = appropriateArticleTitle.group(1)

    print("Wikipedia article title after correction:", articleTitle)

    articleEncodedTitles = re.findall("(?:wiki/([^?]+))|(?:title=([^&]+))",
                                      articleHyperlink)

    print("Wikipedia articles' Encoded Titles:", articleEncodedTitles)

    if articleEncodedTitles[0][0] != '':

        articleEncodedTitle = articleEncodedTitles[0][0]

    elif articleEncodedTitles[0][1] != '':

        articleEncodedTitle = articleEncodedTitles[0][1]

    elif articleEncodedTitles[1][0] != '':

        articleEncodedTitle = articleEncodedTitles[1][0]

    elif articleEncodedTitles[1][1] != '':

        articleEncodedTitle = articleEncodedTitles[1][1]

    print("Encoded title of the article: " + articleEncodedTitle)

    # if '%E2%80%93' in articleEncodedTitle:
    #   strObjs = articleEncodedTitle.split('%E2%80%93')
    #   articleEncodedTitle = strObj[0]
    #   for index in range(len(strObjs)):
    #       articleEncodedTitle += u'–' + strObjs[index]
    # articleEncodedTitle = articleEncodedTitle.replace('%2F', '/')
    # articleEncodedTitle = articleEncodedTitle.replace('%3F', '?')
    # articleEncodedTitle = articleEncodedTitle.replace('%27', "&")
    # articleEncodedTitle = articleEncodedTitle.replace('%27', "'")
    # articleEncodedTitle = articleEncodedTitle.replace('%28', '(')
    # articleEncodedTitle = articleEncodedTitle.replace('%29', ')')
    articleEncodedTitle = articleEncodedTitle.replace('%E2%80%93', u'–')
    articleEncodedTitle = unquote(articleEncodedTitle)

    responseCounter = 0

    numberOfExternalLinks = 0

    # for result in GETRequestFromWikipedia( {'titles':articleTitle, 'prop':'info|contributors|revisions', 'inprop':'protection|watchers', 'pclimit':'max', 'rvprop':'timestamp', 'rvlimit':'max'} ):
    for result in GETRequestFromWikipedia({
            'titles': articleEncodedTitle,
            'prop': 'info|extlinks',
            'inprop': 'protection|watchers',
            'ellimit': 'max'
    }):

        # if result != [ ]:
        pageData = list(result['pages'].values())[0]

        qualityClass, importanceClass, desiredCategoryFound = classFinder(
            articleHyperlink, articleEncodedTitle, desiredCategory)

        if 'pageid' in pageData and pageData['ns'] == 0:
            # if 'pageid' in pageData and pageData['ns'] == 0 and desiredCategoryFound == True:

            if responseCounter == 0:

                # pageInfoData = result['pages'].values()[0]

                print("\n\nID: " + str(pageData['pageid']), '\n')

                statsContext = {}
                statsContext['pageid'] = pageData['pageid']
                statsContext['title'] = articleTitle

                editProtectionLevel = 'None'

                for protection in pageData['protection']:

                    if protection['type'] == 'edit':

                        editProtectionLevel = protection['level']
                        break

                statsContext['editProtectionLevel'] = editProtectionLevel

                if num(pageData['length']) < 1500:
                    print(
                        "The length of the article is less than 1500 characters, consider the page as a stub. Do not recommend it."
                    )
                    qualityClass = "Stub-Class"

                statsContext['qualityClass'] = qualityClass

                statsContext['importanceClass'] = importanceClass

                statsContext['length'] = pageData['length']

                watchersNumber = '0'
                if 'watchers' in pageData:
                    watchersNumber = str(pageData['watchers'])

                statsContext['watchersNumber'] = watchersNumber

                statsContext['touched'] = pageData['touched']

                infoURL = "http://en.wikipedia.org/w/index.php?title=" + \
                    articleEncodedTitle + "&action=info"

                infoSoup = scrapeWebsite(infoURL, 'tr', 'id',
                                         "mw-pageinfo-watchers", "", "", "")

                if infoSoup != False:

                    redirectsStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-watchers").findNext('tr')
                    redirectsStatisticsNumber = '0'
                    if redirectsStatisticsTag:
                        redirectsStatisticsNumber = redirectsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext['redirects'] = redirectsStatisticsNumber

                    firsttimeStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-firsttime")
                    firsttimeStatisticsNumber = '0'
                    if firsttimeStatisticsTag:
                        firsttimeStatisticsNumber = firsttimeStatisticsTag.findAll(
                            'td')[1].find('a').renderContents()

                    statsContext['creationDate'] = firsttimeStatisticsNumber

                    editsStatisticsTag = infoSoup.find('tr',
                                                       id="mw-pageinfo-edits")
                    editsStatisticsNumber = '0'
                    if editsStatisticsTag:
                        editsStatisticsNumber = editsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext['editsNum'] = editsStatisticsNumber

                    # authorsStatisticsTag = infoSoup.find('tr', id="mw-pageinfo-authors")
                    # authorsStatisticsNumber = '0'
                    # if authorsStatisticsTag:
                    #     authorsStatisticsNumber = authorsStatisticsTag.findAll('td')[1].renderContents()

                    # statsContext['distinctAuthors'] = authorsStatisticsNumber

                    recentEditsStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-recent-edits")
                    recentEditsStatisticsNumber = '0'
                    if recentEditsStatisticsTag:
                        recentEditsStatisticsNumber = recentEditsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext['recentEdits'] = recentEditsStatisticsNumber

                    recentAuthorsStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-recent-authors")
                    recentAuthorsStatisticsNumber = '0'
                    if recentAuthorsStatisticsTag:
                        recentAuthorsStatisticsNumber = recentAuthorsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext[
                        'recentDistinctAuthors'] = recentAuthorsStatisticsNumber

                else:
                    statsContext['redirects'] = ''
                    statsContext['creationDate'] = ''
                    statsContext['editsNum'] = ''
                    # statsContext['distinctAuthors'] = ''
                    statsContext['recentEdits'] = ''
                    statsContext['recentDistinctAuthors'] = ''

                # rhStatisticsURL = "http://tools.wmflabs.org/xtools/articleinfo/index.php?article=" + articleEncodedTitle + "&lang=en&wiki=wikipedia"
                # rhStatisticsSoup = scrapeWebsite(rhStatisticsURL, 'div', 'id', "generalstats", "p", "class", "alert alert-danger xt-alert")

                # if rhStatisticsSoup != False and rhStatisticsSoup != "No Search Result":
                #     generalstatsContainer = rhStatisticsSoup.find('div', id="generalstats").find('tr').findAll('tr')
                #     submitAndPrint (statsFile, "Number of minor edits: ", re.search('(\d+) ', generalstatsContainer[6].findAll('td')[1].renderContents()).group(1))
                #     submitAndPrint (statsFile, "Average time between edits: ", generalstatsContainer[10].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Average number of edits per month: ", generalstatsContainer[12].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Average number of edits per year: ", generalstatsContainer[13].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Number of edits in the past 24 hours: ", generalstatsContainer[15].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Number of edits in the past 7 days: ", generalstatsContainer[16].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Number of edits in the past 30 days: ", generalstatsContainer[17].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Number of edits in the past 365 days: ", generalstatsContainer[18].findAll('td')[1].renderContents())
                #     submitAndPrint (statsFile, "Number of links to this page: ", generalstatsContainer[27].findAll('td')[1].find('a').renderContents())
                # else:
                #     submitAndPrint (statsFile, "Number of minor edits: ", '')
                #     submitAndPrint (statsFile, "Average time between edits: ", '')
                #     submitAndPrint (statsFile, "Average number of edits per month: ", '')
                #     submitAndPrint (statsFile, "Average number of edits per year: ", '')
                #     submitAndPrint (statsFile, "Number of edits in the the past 24 hours: ", '')
                #     submitAndPrint (statsFile, "Number of edits in the past 7 days: ", '')
                #     submitAndPrint (statsFile, "Number of edits in the past 30 days: ", '')
                #     submitAndPrint (statsFile, "Number of edits in the past 365 days: ", '')
                #     submitAndPrint (statsFile, "Number of links to this page: ", '')

                todayDate = (datetime.date.today()).strftime('%Y%m%d')
                pastMonthDate = (
                    datetime.date.today() -
                    datetime.timedelta(days=30)).strftime('%Y%m%d')
                trafficStatisticsURL = (
                    "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/"
                    + "all-access/user/" +
                    articleEncodedTitle.replace("/", "-") + "/daily/" +
                    pastMonthDate + "/" + todayDate)
                viewsErrorsNum = 0
                while viewsErrorsNum < 10:
                    try:
                        print("Requesting " + trafficStatisticsURL)
                    except:
                        print(
                            "UnicodeEncodeError: 'ascii' codec can't encode character."
                        )
                    try:
                        numberofViewsResponse = requests.get(
                            trafficStatisticsURL)
                        numberofViewsResponse = numberofViewsResponse.json()
                        numberofViewsPerDays = numberofViewsResponse['items']
                        break
                    except:
                        print(
                            "\n\n\nI cannot retrieve the number of views of this article!"
                        )
                        time.sleep(1)
                        viewsErrorsNum += 1
                viewsNum = 0
                if viewsErrorsNum < 10:
                    for numberofViewsPerDay in numberofViewsPerDays:
                        viewsNum += numberofViewsPerDay['views']

                statsContext['viewsNum'] = viewsNum
                print("# of views in the last month: " + str(viewsNum))

                if articleSoup != False:
                    referenceList = articleSoup.findAll(
                        'li', id=re.compile('cite_note.*'))
                    statsContext['referencesNum'] = len(referenceList)
                    # if referenceList != None:
                    #     referenceList = referenceList.findAll('li')
                    # else:
                    #     referenceList = [ ]
                    afterYear = 2010
                    referencesNumber = 0
                    for reference in referenceList:
                        yearDigits = re.search('.*(\d\d\d\d).*',
                                               str(reference.renderContents()))
                        if yearDigits != None and int(
                                yearDigits.group(1)) >= afterYear:
                            referencesNumber += 1
                    statsContext['referencesNumAfter2010'] = referencesNumber
                else:
                    statsContext['referencesNum'] = '0'
                    statsContext['referencesNumAfter2010'] = '0'
                if 'extlinks' in pageData:
                    numberOfExternalLinks = len(pageData['extlinks'])
            else:
                if 'extlinks' in pageData:
                    numberOfExternalLinks += len(pageData['extlinks'])
            responseCounter = 1
            print("# External Hyperlinks: " + str(numberOfExternalLinks), '\n')

            statsContext['externalLinks'] = numberOfExternalLinks
            return statsContext
        else:
            return []

Beispiel #4

Datei anzeigen

def WikipediaPageStats(articleHyperlink, articleSoup, articleTitle,
                       desiredCategory):

    print "Wikipedia article title before correction:", convert_unicode(
        articleTitle)

    # Find the appropriate title of the Wikipedia page from articleTitle.
    appropriateArticleTitle = re.search("(.+) -", articleTitle)

    # IF appropriateArticleTitle is found:
    if appropriateArticleTitle != None:

        # Assign the value of appropriateArticleTitle to articleTitle.
        articleTitle = appropriateArticleTitle.group(1)

    print "Wikipedia article title after correction:", convert_unicode(
        articleTitle)

    # Find the encoded title of the article.
    articleEncodedTitles = re.findall("(?:wiki/([^?]+))|(?:title=([^&]+))",
                                      articleHyperlink)

    print "Wikipedia articles' Encoded Titles:", articleEncodedTitles

    # Find the encoded title of the article from the list of findings.
    if articleEncodedTitles[0][0] != '':

        articleEncodedTitle = articleEncodedTitles[0][0]

    elif articleEncodedTitles[0][1] != '':

        articleEncodedTitle = articleEncodedTitles[0][1]

    elif articleEncodedTitles[1][0] != '':

        articleEncodedTitle = articleEncodedTitles[1][0]

    elif articleEncodedTitles[1][1] != '':

        articleEncodedTitle = articleEncodedTitles[1][1]

    print "Encoded title of the article: " + articleEncodedTitle

    # If there is any %2F in the encoded title, convert it into /.
    # if '%E2%80%93' in articleEncodedTitle:
    #   strObjs = articleEncodedTitle.split('%E2%80%93')
    #   articleEncodedTitle = strObj[0]
    #   for index in range(len(strObjs)):
    #       articleEncodedTitle += u'–' + strObjs[index]
    # articleEncodedTitle = articleEncodedTitle.replace('%2F', '/')
    # articleEncodedTitle = articleEncodedTitle.replace('%3F', '?')
    # articleEncodedTitle = articleEncodedTitle.replace('%27', "&")
    # articleEncodedTitle = articleEncodedTitle.replace('%27', "'")
    # articleEncodedTitle = articleEncodedTitle.replace('%28', '(')
    # articleEncodedTitle = articleEncodedTitle.replace('%29', ')')
    articleEncodedTitle = articleEncodedTitle.replace('%E2%80%93', u'–')
    articleEncodedTitle = urllib.unquote(articleEncodedTitle)

    # Define a counter to count the number of continued parts of the response.
    responseCounter = 0

    # The number of external hyperlinks in this page.
    numberOfExternalLinks = 0

    # For each response to be continued:
    #for result in GETRequestFromWikipedia( {'titles':articleTitle, 'prop':'info|contributors|revisions', 'inprop':'protection|watchers', 'pclimit':'max', 'rvprop':'timestamp', 'rvlimit':'max'} ):
    for result in GETRequestFromWikipedia({
            'titles': articleEncodedTitle,
            'prop': 'info|extlinks',
            'inprop': 'protection|watchers',
            'ellimit': 'max'
    }):

        # If there is an appropriate result:
        # if result != [ ]:

        # Extract page data from the JSON response.
        pageData = result['pages'].values()[0]

        # Find the quality and importance classes of the Wikipedia page and if the page is included in the desired category.
        qualityClass, importanceClass, desiredCategoryFound = classFinder(
            articleHyperlink, articleEncodedTitle, desiredCategory)

        # If there is pageID which means this page actually exists on Wikipedia and the namespace is 0 which means that the page is the main article:
        if 'pageid' in pageData and pageData['ns'] == 0:
            # if 'pageid' in pageData and pageData['ns'] == 0 and desiredCategoryFound == True:

            # If this is the first part of the response:
            if responseCounter == 0:

                # Extract page info data from the JSON response.
                # pageInfoData = result['pages'].values()[0]

                # Print out the page id.
                print "\n\nID: " + str(pageData['pageid']), '\n'

                statsContext = {}
                statsContext['pageid'] = pageData['pageid']
                statsContext['title'] = articleTitle

                # Initialize editProtectionLevel.
                editProtectionLevel = 'None'

                # OIterate on all types of protections:
                for protection in pageData['protection']:

                    # It the edit protection found:
                    if protection['type'] == 'edit':

                        # If the edit protection level found, get out of the for loop.
                        editProtectionLevel = protection['level']
                        break

                # Print out the edit protection level.
                statsContext['editProtectionLevel'] = editProtectionLevel

                # If the length of the article is less than 1500 characters, consider the page as a stub.
                if num(pageData['length']) < 1500:
                    print "The length of the article is less than 1500 characters, consider the page as a stub. Do not recommend it."
                    qualityClass = "Stub-Class"

                # Print out the quality class of the article.
                statsContext['qualityClass'] = qualityClass

                # Print out the Importance class of the article.
                statsContext['importanceClass'] = importanceClass

                # Print out the length of the article.
                statsContext['length'] = pageData['length']

                watchersNumber = '0'
                # If watchers exists in the pageData dictionary:
                if 'watchers' in pageData:
                    watchersNumber = str(pageData['watchers'])

                # Print out the number of watchers.
                statsContext['watchersNumber'] = watchersNumber

                # Print out the timestamp showing whenever the page changes in a way requiring it to be re-rendered, invalidating caches. Aside from editing this includes permission changes, creation or deletion of linked pages, and alteration of contained templates.
                statsContext['touched'] = pageData['touched']

                # Retrieve content of the Information page in BeautifulSoup structure.
                infoURL = "http://en.wikipedia.org/w/index.php?title=" + articleEncodedTitle + "&action=info"

                # Find the soup structure if BeautifulSoup structure of the Wikipedia page is available, and there is number of watchers available in the soup structure of the Wikipedia page, which means that the Wikipedia page is in an appropriate format.
                infoSoup = scrapeWebsite(infoURL, 'tr', 'id',
                                         "mw-pageinfo-watchers", "", "", "")

                if infoSoup != False:

                    # Print out the number of redirects to this page.
                    redirectsStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-watchers").findNext('tr')
                    redirectsStatisticsNumber = '0'
                    if redirectsStatisticsTag:
                        redirectsStatisticsNumber = redirectsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext['redirects'] = redirectsStatisticsNumber

                    # Print out the date of page creation.
                    firsttimeStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-firsttime")
                    firsttimeStatisticsNumber = '0'
                    if firsttimeStatisticsTag:
                        firsttimeStatisticsNumber = firsttimeStatisticsTag.findAll(
                            'td')[1].find('a').renderContents()

                    statsContext['creationDate'] = firsttimeStatisticsNumber

                    # Print out the total number of edits.
                    editsStatisticsTag = infoSoup.find('tr',
                                                       id="mw-pageinfo-edits")
                    editsStatisticsNumber = '0'
                    if editsStatisticsTag:
                        editsStatisticsNumber = editsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext['editsNum'] = editsStatisticsNumber

                    # Print out the total number of distinct authors.
                    # authorsStatisticsTag = infoSoup.find('tr', id="mw-pageinfo-authors")
                    # authorsStatisticsNumber = '0'
                    # if authorsStatisticsTag:
                    #     authorsStatisticsNumber = authorsStatisticsTag.findAll('td')[1].renderContents()

                    # statsContext['distinctAuthors'] = authorsStatisticsNumber

                    # Print out recent number of edits (within past 30 days).
                    recentEditsStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-recent-edits")
                    recentEditsStatisticsNumber = '0'
                    if recentEditsStatisticsTag:
                        recentEditsStatisticsNumber = recentEditsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext['recentEdits'] = recentEditsStatisticsNumber

                    # Print out recent number of distinct authors.
                    recentAuthorsStatisticsTag = infoSoup.find(
                        'tr', id="mw-pageinfo-recent-authors")
                    recentAuthorsStatisticsNumber = '0'
                    if recentAuthorsStatisticsTag:
                        recentAuthorsStatisticsNumber = recentAuthorsStatisticsTag.findAll(
                            'td')[1].renderContents()

                    statsContext[
                        'recentDistinctAuthors'] = recentAuthorsStatisticsNumber

                # Otherwise:
                else:

                    statsContext['redirects'] = ''

                    # Print out the date of page creation.
                    statsContext['creationDate'] = ''

                    # Print out the total number of edits.
                    statsContext['editsNum'] = ''

                    # Print out the total number of distinct authors.
                    # statsContext['distinctAuthors'] = ''

                    # Print out recent number of edits (within past 30 days).
                    statsContext['recentEdits'] = ''

                    # Print out recent number of distinct authors.
                    statsContext['recentDistinctAuthors'] = ''

                # # Retrieve content of the Revision history statistics page in BeautifulSoup structure.
                # rhStatisticsURL = "http://tools.wmflabs.org/xtools/articleinfo/index.php?article=" + articleEncodedTitle + "&lang=en&wiki=wikipedia"

                # # Find the soup structure if BeautifulSoup structure of the tools.wmflabs.org page is available, and there is generalstats container available in the soup structure of the tools.wmflabs.org page, which means that the tools.wmflabs.org page is in an appropriate format.
                # rhStatisticsSoup = scrapeWebsite(rhStatisticsURL, 'div', 'id', "generalstats", "p", "class", "alert alert-danger xt-alert")

                # if rhStatisticsSoup != False and rhStatisticsSoup != "No Search Result":

                #     # Find the container.
                #     generalstatsContainer = rhStatisticsSoup.find('div', id="generalstats").find('tr').findAll('tr')

                #     # Print out the number of minor edits.
                #     submitAndPrint (statsFile, "Number of minor edits: ", re.search('(\d+) ', generalstatsContainer[6].findAll('td')[1].renderContents()).group(1))

                #     # Print out the average time between edits.
                #     submitAndPrint (statsFile, "Average time between edits: ", generalstatsContainer[10].findAll('td')[1].renderContents())

                #     # Print out the average number of edits per month.
                #     submitAndPrint (statsFile, "Average number of edits per month: ", generalstatsContainer[12].findAll('td')[1].renderContents())

                #     # Print out the average number of edits per year.
                #     submitAndPrint (statsFile, "Average number of edits per year: ", generalstatsContainer[13].findAll('td')[1].renderContents())

                #     # Print out the number of edits in the last day.
                #     submitAndPrint (statsFile, "Number of edits in the past 24 hours: ", generalstatsContainer[15].findAll('td')[1].renderContents())

                #     # Print out the number of edits in the last week.
                #     submitAndPrint (statsFile, "Number of edits in the past 7 days: ", generalstatsContainer[16].findAll('td')[1].renderContents())

                #     # Print out the number of edits in the last month.
                #     submitAndPrint (statsFile, "Number of edits in the past 30 days: ", generalstatsContainer[17].findAll('td')[1].renderContents())

                #     # Print out the number of edits in the last year.
                #     submitAndPrint (statsFile, "Number of edits in the past 365 days: ", generalstatsContainer[18].findAll('td')[1].renderContents())

                #     # Print out the number of links to this page.
                #     submitAndPrint (statsFile, "Number of links to this page: ", generalstatsContainer[27].findAll('td')[1].find('a').renderContents())

                # # Otherwise:
                # else:

                #     # Print out the number of minor edits.
                #     submitAndPrint (statsFile, "Number of minor edits: ", '')

                #     # Print out the average time between edits.
                #     submitAndPrint (statsFile, "Average time between edits: ", '')

                #     # Print out the average number of edits per month.
                #     submitAndPrint (statsFile, "Average number of edits per month: ", '')

                #     # Print out the average number of edits per year.
                #     submitAndPrint (statsFile, "Average number of edits per year: ", '')

                #     # Print out the number of edits in the last day.
                #     submitAndPrint (statsFile, "Number of edits in the the past 24 hours: ", '')

                #     # Print out the number of edits in the last week.
                #     submitAndPrint (statsFile, "Number of edits in the past 7 days: ", '')

                #     # Print out the number of edits in the last month.
                #     submitAndPrint (statsFile, "Number of edits in the past 30 days: ", '')

                #     # Print out the number of edits in the last year.
                #     submitAndPrint (statsFile, "Number of edits in the past 365 days: ", '')

                #     # Print out the number of links to this page.
                #     submitAndPrint (statsFile, "Number of links to this page: ", '')

                # Find current date.
                todayDate = (datetime.date.today()).strftime('%Y%m%d')

                # Find date of a month ago.
                pastMonthDate = (
                    datetime.date.today() -
                    datetime.timedelta(days=30)).strftime('%Y%m%d')

                # Retrieve content of the traffic statistics page in BeautifulSoup structure.
                trafficStatisticsURL = (
                    "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/"
                    + "all-access/user/" +
                    articleEncodedTitle.replace("/", "-") + "/daily/" +
                    pastMonthDate + "/" + todayDate)

                viewsErrorsNum = 0
                while viewsErrorsNum < 10:
                    try:
                        print("Requesting " + str(trafficStatisticsURL))
                    except:
                        print(
                            "UnicodeEncodeError: 'ascii' codec can't encode character."
                        )
                    try:
                        numberofViewsResponse = requests.get(
                            trafficStatisticsURL)
                        numberofViewsResponse = numberofViewsResponse.json()
                        numberofViewsPerDays = numberofViewsResponse['items']
                        break
                    except:
                        print(
                            "\n\n\nI cannot retrieve the number of views of this article!"
                        )
                        time.sleep(1)
                        viewsErrorsNum += 1

                # Print out the number of views in the last month.
                viewsNum = 0
                if viewsErrorsNum < 10:
                    for numberofViewsPerDay in numberofViewsPerDays:
                        viewsNum += numberofViewsPerDay['views']

                statsContext['viewsNum'] = viewsNum
                print("# of views in the last month: " + str(viewsNum))

                if articleSoup != False:

                    # Retrieve the fully formatted reference tags.
                    referenceList = articleSoup.findAll(
                        'li', id=re.compile('cite_note.*'))

                    # Print out the total number of references.
                    statsContext['referencesNum'] = len(referenceList)

                    # If referenceList is None, return nothing.
                    # if referenceList != None:
                    #     referenceList = referenceList.findAll('li')
                    # else:
                    #     referenceList = [ ]

                    # Find the number of citation on the page after 1990, 1991, 1992, ... and 2014.
                    afterYear = 2010

                    # Number of references which have been published after afterYear.
                    referencesNumber = 0

                    # Among all th references, find the ones which have been published after afterYear.
                    for reference in referenceList:

                        # Find the year digits.
                        yearDigits = re.search('.*(\d\d\d\d).*',
                                               reference.renderContents())

                        # If year digits found, cvonvert yearDigits into a proper integer, and check if it is greater than afterYear
                        if yearDigits != None and int(
                                yearDigits.group(1)) >= afterYear:

                            # Increment the number of references which have been published after afterYear.
                            referencesNumber += 1

                        # Print out the number of references which have been published after afterYear.
                    statsContext['referencesNumAfter2010'] = referencesNumber

                # Otherwise:
                else:

                    statsContext['referencesNum'] = '0'

                    # Find the number of citation on the page after 1990, 1991, 1992, ... and 2014.
                    statsContext['referencesNumAfter2010'] = '0'

                # Calculate the number of external hyperlinks in this page in the first part of the response.
                if 'extlinks' in pageData:
                    numberOfExternalLinks = len(pageData['extlinks'])

            else:
                # Calculate the number of external hyperlinks in this page in the first part of the response.
                if 'extlinks' in pageData:
                    numberOfExternalLinks += len(pageData['extlinks'])

            # Increment responseCounter.
            responseCounter = 1

            # Print out the number of external hyperlinks in this page.
            print "# External Hyperlinks: " + str(numberOfExternalLinks), '\n'

            statsContext['externalLinks'] = numberOfExternalLinks

            # Return a signal indicating that the Wikipedia page data has been saved successfully.
            return statsContext

        # Otherwise:
        else:

            # Return a signal indicating that the Wikipedia page data has not been saved successfully.
            return []