Python logError Exemples, error_logger.logError Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : content_extractor.py Projet : naumanxds/python_webScraper_fiverr_FO3A39595163

def getGoogleLinks(searchKey, startPage, filters):
    print(' >> Starting Google Search for Links << \n')
    for loop in range(15):
        try:
            startPage += 1
            scrapeLinks = []
            result = google.search(searchKey, startPage)

            for link in result:
                if str(link.link) == NOT_FOUND:
                    continue

                flag = False
                for filterKey in filters:
                    if filterKey in link.link:
                        flag = True
                        break

                if not flag:
                    scrapeLinks.append(link.link)

        except Exception as e:
            error_logger.logError(format(e))

    return startPage, scrapeLinks

Exemple #2

0

Afficher le fichier

def listOptions():
    while True:
        try:
            print('** Google Scraper ** ')
            print('** ============== ** ')
            print('=> Options: ')
            print(' 1. Start Scraping of Existing Search Keyword')
            print(' 2. Enter new Keywoard and Depriate Old one')
            print(' 3. Enter new Filter Key Word')
            print(' 4. Show Details of Existing Keyword')
            print(' 5. Delete Filter Key Word')
            print(' 6. Clear Screen')
            print(' 7. Exit System')
            usrInput = int(input('\n\n  => Enter Option Number : '))
            print('\n\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n\n')
            if usrInput == 1:
                content_extractor.startScraping()
            elif usrInput == 2:
                dictionary.enterNewKeyword()
            elif usrInput == 3:
                dictionary.enterNewFilter()
            elif usrInput == 4:
                dictionary.displayDictionaryDetail()
            elif usrInput == 5:
                dictionary.deleteFilter()
            elif usrInput == 6:
                clearScreen()
            elif usrInput == 7:
                break
            else:
                print(' >> Wrong Input <<')

            print('\n\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n\n')
        except Exception as e:
            error_logger.logError(format(e))

Exemple #3

0

Afficher le fichier

Fichier : content_extractor.py Projet : naumanxds/python_webScraper_fiverr_FO3A39595163

def getHtml(url):
    try:
        driver.get(url)
        driver.execute_script('return document.documentElement.outerHTML')
        return BeautifulSoup(driver.page_source, 'lxml')

    except Exception as e:
        error_logger.logError('Error in Fetching HTML == ' + format(e), url)

    return False

Exemple #4

0

Afficher le fichier

Fichier : content_extractor.py Projet : naumanxds/python_webScraper_fiverr_FO3A39595163

def writeCSV(data, url=''):
    try:
        with open('Contact-Lists-' + datetime.now().strftime('%d-%b-%y') +
                  '.csv',
                  'a',
                  encoding="utf-8") as fh:
            csvWriter = csv.writer(fh)
            csvWriter.writerow(data)
    except Exception as e:
        error_logger.logError(
            'Error in Writing Data into the file == ' + format(e), url)

Exemple #5

0

Afficher le fichier

def startScraping():
    try:
        dictionaryDetail = dictionary.readDict()
        searchKey = dictionaryDetail['search_key']
        filters = dictionaryDetail['filters']
        startPage = dictionaryDetail['next_page']
        timeNow = datetime.now()

        if searchKey is "":
            print(
                ' There is no Search Key to Scrape Please Select option No 2. and Enter a SearchKey'
            )
            return

        if not startPage:
            startPage = 1

        if dictionaryDetail['last_executed'] != '' and (
                timeNow -
                datetime.strptime(dictionaryDetail['last_executed'],
                                  '%Y-%m-%d %H:%M:%S.%f')) < timedelta(1):
            print(
                ' =>> 24 Hrs Have not passed after the last execution yet. Script cannot Run now. <<=\n\n'
            )
            driver.quit()
            exit()

        for loop in range(50):
            print(' >> Starting Google Search for Links << \n')
            scrapeLinks = []
            result = google.search(searchKey, startPage)
            for link in result:
                flag = False
                for filterKey in filters:
                    if filterKey in link.link:
                        flag = True
                        break

                if flag:
                    continue

                scrapeLinks.append(link.link)
            startPage += 1
            visitWebsites(scrapeLinks)

        dictionaryDetail['next_page'] = startPage
        dictionaryDetail['last_executed'] = timeNow.strftime(
            '%Y-%m-%d %H:%M:%S.%f')
        dictionary.writeDict(dictionaryDetail)
        driver.quit()
    except Exception as e:
        error_logger.logError(format(e))

Exemple #6

0

Afficher le fichier

Fichier : content_extractor.py Projet : naumanxds/python_webScraper_fiverr_FO3A39595163

def checkResponsive(html):
    try:
        meta = html.find('meta', {'name': 'viewport'})
        responsive = html.find('div', {'class': 'responsive'})
        svg = html.find('svg')
        if str(meta) == NOT_FOUND and str(responsive) == NOT_FOUND and str(
                svg) == NOT_FOUND and ('responsive' not in str(html)):
            return False

        return True
    except Exception as e:
        error_logger.logError(format(e))
        return True

Exemple #7

0

Afficher le fichier

def extractEmails(html, baseUrl):
    try:
        mails = []
        hrefs = html.find_all('a')
        flag = False
        for href in hrefs:
            if 'mailto' in str(href.get('href')):
                mails.append(str(href.get('href')))
                flag = True

            if '@' in str(href.get_text()):
                mails.append(str(href.get_text()))
                flag = True

        if flag:
            mails.insert(0, baseUrl)
            writeFile(mails, baseUrl)
            print(' => Conatct Extracted Successfully for => ' + baseUrl)
    except Exception as e:
        error_logger.logError(format(e))

Exemple #8

0

Afficher le fichier

Fichier : content_extractor.py Projet : naumanxds/python_webScraper_fiverr_FO3A39595163

def visitWebsites(links):
    for link in links:
        try:
            baseUrl = link.split('://')
            baseUrl = baseUrl[0] + '://' + baseUrl[1].split('/')[0]
            html = getHtml(baseUrl)
            if checkResponsive(html):
                print(' => website is responsive => ' + baseUrl)
                continue
            print(' => website is not responsive => ' + baseUrl)

            extractEmails(html, baseUrl)
            allLinks = html.find_all('a')
            for href in allLinks:
                if 'impressum' in str(href.get('href')) or 'kontakt' in str(
                        href.get('href')) or 'uber' in str(href.get('href')):
                    extractEmails(
                        getHtml(baseUrl + '/' + str(href.get('href'))),
                        baseUrl)
        except Exception as e:
            error_logger.logError(format(e), link)

Exemple #9

0

Afficher le fichier

Fichier : content_extractor.py Projet : naumanxds/python_webScraper_fiverr_FO3A39595163

def startScraping():
    try:
        dictionaryDetail = dictionary.readDict()
        searchKey = dictionaryDetail['search_key']
        filters = dictionaryDetail['filters']
        startPage = dictionaryDetail['next_page']
        timeNow = datetime.now()

        if searchKey is "":
            print(
                ' There is no Search Key to Scrape Please Select option No 2. and Enter a SearchKey'
            )
            driver.quit()
            return

        if not startPage:
            startPage = 1

        if dictionaryDetail['last_executed'] != '' and (
                timeNow -
                datetime.strptime(dictionaryDetail['last_executed'],
                                  '%Y-%m-%d %H:%M:%S.%f')) < timedelta(1):
            print(
                ' =>> 24 Hrs Have not passed after the last execution yet. Script cannot Run now. <<=\n\n'
            )
            driver.quit()
            exit()

        dictionaryDetail['last_executed'] = timeNow.strftime(
            '%Y-%m-%d %H:%M:%S.%f')
        finalPage, scrapedLinks = getGoogleLinks(searchKey, startPage, filters)
        dictionaryDetail['next_page'] = finalPage
        dictionary.writeDict(dictionaryDetail)

        visitWebsites(scrapedLinks)
    except Exception as e:
        error_logger.logError(format(e))

    driver.quit()