def email_status(step): if step == 'START': # SUBJECT & RECIPIENTS mymail = pyMail( g['PKG_NME_PRNT'] + ' - STARTED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL) mymail.htmladd('End Of Message') # SEND mymail.send(**g) elif step == 'END': # ============================================================================= # LOOPS THROUGH TABLE LIST AND GENERATES SUMMARY DATA FOR EMAIL # ============================================================================= dbmgr = pyDB(g['DB']) q = r"""SELECT msmt_dte_id, count(*) as row_count, round(sum(INDEX_VAL),4) as index_val, round(sum(HIGH_VAL),4) as high_val, round(sum(low_VAL),4) as low_val, round(sum(TTL_MRKT_VAL),4) as ttl_mrkt_val FROM {0} WHERE 1 = 1 GROUP BY msmt_dte_id ORDER BY msmt_dte_id DESC LIMIT 5""".format( 'PY_COMMODITY_DATA') rslt = dbmgr.query(q) # ============================================================================= # EMAIL SUMMARY OF RESULTS TO DISTRIBUTION LIST # ============================================================================= htmlRes = '''<table cellpadding="8" cellspacing="3" border="1"> <tr> <th>msmt_date_id</th> <th>row_count</th> <th>index_val</th> <th>high_val</th> <th>low_val</th> <th>ttl_mrkt_val</th> </tr>''' for r in rslt: htmlRes = htmlRes + '<tr><td>' + str(r[0]) + '</td><td>' + str( r[1]) + '</td><td>' + str(r[2]) + '</td><td>' + str( r[3]) + '</td><td>' + str(r[4]) + '</td><td>' + str( r[5]) + '</td></tr>' htmlRes = htmlRes + '</table>' # SUBJECT & RECIPIENTS mymail = pyMail( g['PKG_NME_PRNT'] + ' - ENDED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL) mymail.htmladd('Scrape has completed for : ' + g['PKG_NME_PRNT']) # FURTHER DETAILS ADDED TO BODY (SEPERATED BY A PARAGRAPH SO LINE FEEDS NOT REQUIRED) # ADD LINE OF TEXT mymail.htmladd('Summary of Scrape for ' + g['PKG_NME_PRNT']) # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes) # SEND mymail.send(**g)
def email_status(step): if step == 'START': # SUBJECT & RECIPIENTS mymail = pyMail(g['PKG_NME_PRNT'] + ' - STARTED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL) mymail.htmladd('End Of Message') # SEND mymail.send(**g) elif step == 'END': # ============================================================================= # LOOPS THROUGH TABLE LIST AND GENERATES SUMMARY DATA FOR EMAIL # ============================================================================= dbmgr = pyDB(g['DB']) q = r"""SELECT MSMT_DTE_ID, CNTRY_CDE, count(*) as row_count FROM {0} WHERE 1 = 1 AND msmt_dte_id >= strftime('%Y%m%d', date('now','localtime','-6 day')) GROUP BY MSMT_DTE_ID, CNTRY_CDE ORDER BY CASE CNTRY_CDE WHEN 'AU' THEN 1 WHEN 'NZ' THEN 2 WHEN 'UK' THEN 3 WHEN 'CA' THEN 4 WHEN 'US' THEN 5 END , 1 DESC""".format( 'PY_EMP_TWITTER_DATA') rslt = dbmgr.query(q) # ============================================================================= # EMAIL SUMMARY OF RESULTS TO DISTRIBUTION LIST # ============================================================================= htmlRes = '''<table cellpadding="8" cellspacing="3" border="1"> <tr> <th>msmt_date_id</th> <th>cntry_cde</th> <th>row_count</th> </tr>''' for r in rslt: htmlRes = htmlRes + '<tr><td>' + str(r[0]) + '</td><td>' + str(r[1]) + '</td><td>' + str(r[2]) + '</td></tr>' htmlRes = htmlRes + '</table>' # SUBJECT & RECIPIENTS mymail = pyMail(g['PKG_NME_PRNT'] + ' - ENDED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL) mymail.htmladd('Scrape has completed for : ' + g['PKG_NME_PRNT']) # FURTHER DETAILS ADDED TO BODY (SEPERATED BY A PARAGRAPH SO LINE FEEDS NOT REQUIRED) # ADD LINE OF TEXT mymail.htmladd('Summary of Scrape for ' + g['PKG_NME_PRNT']) # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes) # SEND mymail.send(**g)
def email_status(step): if step == 'START': # SUBJECT & RECIPIENTS mymail = pyMail( g['PKG_NME_PRNT'] + ' : NZ - STARTED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL) mymail.htmladd('End Of Message') # SEND mymail.send(**g) elif step == 'END': # ============================================================================= # LOOPS THROUGH TABLE LIST AND GENERATES SUMMARY DATA FOR EMAIL # ============================================================================= dbmgr = pyDB(g['DB']) q = r"""SELECT msmt_dte_id, cntry_cde, count( * ) AS row_cnt, sum(facet_cnt) as job_count FROM {0} WHERE cntry_cde = 'NZ' GROUP BY msmt_dte_id, cntry_cde ORDER BY msmt_dte_id DESC LIMIT 5""".format( 'WEBDATA_JOBADS') rslt = dbmgr.query(q) # ============================================================================= # EMAIL SUMMARY OF RESULTS TO DISTRIBUTION LIST # ============================================================================= htmlRes = '''<table cellpadding="8" cellspacing="3" border="1"> <tr> <th>msmt_date_id</th> <th>cntry_cde</th> <th>row_cnt</th> <th>job_cnt</th> </tr>''' for r in rslt: htmlRes = htmlRes + '<tr><td>' + str(r[0]) + '</td><td>' + str( r[1]) + '</td><td>' + str(r[2]) + '</td><td>' + str( r[3]) + '</td></tr>' htmlRes = htmlRes + '</table>' # ============================================================================= # LOOPS THROUGH TABLE LIST AND GENERATES SECONDARY SUMMARY DATA FOR EMAIL # ============================================================================= dbmgr = pyDB(g['DB']) q = r"""select max(MSMT_DTE_ID) as msmt_dte_id, CNTRY_CDE, SITE_CDE, SUM( CURR_ROW_CNT ) AS CURR_ROW_CNT, SUM( PREV_ROW_CNT ) AS PREV_ROW_CNT, SUM( CURR_FACET_CNT ) AS CURR_FACET_CNT, SUM( PREV_FACET_CNT ) AS PREV_FACET_CNT from ( select msmt_dte_id, cntry_cde, site_cde, case when MSMT_DTE_ID = strftime( '%Y%m%d', date( 'now', 'localtime' ) ) then count(*) else 0 end as CURR_ROW_CNT, case when MSMT_DTE_ID = strftime( '%Y%m%d', date( 'now', 'localtime', '-1 day' ) ) then count(*) else 0 end as PREV_ROW_CNT, cast(case when MSMT_DTE_ID = strftime( '%Y%m%d', date( 'now', 'localtime' ) ) then sum( FACET_CNT ) else 0 end as INTEGER) as CURR_FACET_CNT, cast(case when MSMT_DTE_ID = strftime( '%Y%m%d', date( 'now', 'localtime', '-1 day' ) ) then sum( FACET_CNT ) else 0 end as INTEGER) as PREV_FACET_CNT from WEBDATA_JOBADS where 1 = 1 and cntry_cde = 'NZ' and MSMT_DTE_ID >= strftime( '%Y%m%d', date( 'now', 'localtime', '-1 day' ) ) group by msmt_dte_id, cntry_cde, site_cde ) group BY CNTRY_CDE, SITE_CDE order by 1, 3""".format('WEBDATA_JOBADS') rslt = dbmgr.query(q) # ============================================================================= # EMAIL SUMMARY OF RESULTS TO DISTRIBUTION LIST # ============================================================================= htmlRes2 = '''<table cellpadding="8" cellspacing="3" border="1"> <tr> <th>msmt_date_id</th> <th>cntry_cde</th> <th>site_cde</th> <th>curr_row_cnt</th> <th>prev_row_cnt</th> <th>curr_facet_cnt</th> <th>prev_facet_cnt</th> </tr>''' for r in rslt: htmlRes2 = htmlRes2 + '<tr><td>' + str(r[0]) + '</td><td>' + str( r[1]) + '</td><td>' + str(r[2]) + '</td><td>' + str( r[3]) + '</td><td>' + str(r[4]) + '</td><td>' + str( r[5]) + '</td><td>' + str(r[6]) + '</td></tr>' htmlRes2 = htmlRes2 + '</table>' # SUBJECT & RECIPIENTS mymail = pyMail( g['PKG_NME_PRNT'] + ' : NZ - ENDED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL) mymail.htmladd('Scrape has completed for : ' + g['PKG_NME_PRNT'] + ' : UK') # FURTHER DETAILS ADDED TO BODY (SEPERATED BY A PARAGRAPH SO LINE FEEDS NOT REQUIRED) # ADD LINE OF TEXT mymail.htmladd('Summary of Scrape for ' + g['PKG_NME_PRNT'] + ' : UK') # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes) # ADD LINE OF TEXT mymail.htmladd('CURR and PREV days comparison') # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes2) # SEND mymail.send(**g)
def tweepySearch(searchQuery, sinceId, sentmnt_mtch, place_id, cc, **g): # SETUP TWITTER AUTHORISATION auth = tweepy.AppAuthHandler(g['TWIT_CNSMR_KEY'], g['TWIT_CNSMR_SECRET']) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if (not api): print('UNABLE TO AUTHENTICATE') sys.exit(-1) # IF RESULTS ONLY BELOW A SPECIFIC ID ARE, SET MAX_ID TO THAT ID. # ELSE DEFAULT TO NO UPPER LIMIT, START FROM THE MOST RECENT TWEET MATCHING THE SEARCH QUERY. max_id = -1 #L tweetCount = 0 print("Downloading max {0} tweets".format(g['TWIT_MAX_TWEETS'])) while tweetCount < int(g['TWIT_MAX_TWEETS']): try: if (max_id <= 0): if (not sinceId): tweets = api.search( q=searchQuery, count=int( g['TWIT_TWEETS_PER_QRY'])) #q=searchQuery else: tweets = api.search(q=searchQuery, count=int( g['TWIT_TWEETS_PER_QRY']), since_id=sinceId) else: if (not sinceId): tweets = api.search(q=searchQuery, count=int( g['TWIT_TWEETS_PER_QRY']), max_id=str(max_id - 1)) else: tweets = api.search(q=searchQuery, count=int( g['TWIT_TWEETS_PER_QRY']), max_id=str(max_id - 1), since_id=sinceId) if not tweets: print("No more tweets found") break # ============================================================================= # PROCESS TWEETS COLLECTED FROM THE SEARCH API PROCESS # ============================================================================= for tweet in tweets: # ============================================================================= # WRITE RESULTS OF EACH TWEET TO LOCAL DB # ============================================================================= created_at = str(tweet.created_at).split(' ') created_at = created_at[0].replace('-', '') #print(tweet.encode('ascii', 'replace').decode("utf-8")) dbmgr = pyDB(g['DB']) q = r"""INSERT INTO {0} (MSMT_DTE_ID, CREATED_AT, TWEET_ID, USER_ID, USER_NAME, USER_SCREEN_NAME, USER_LOCATION, CNTRY_ID, CNTRY_CDE, PLACE_NAME, SENTMT_MATCH, TWEET_TXT, IN_REPLY_TO, RE_TWEETED, PRCES_DTE_ID, STARTED_AT, FINISHED_AT) VALUES ({1}, '{2}', {3}, {4}, '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}', '{13}', '{14}', {15}, '{16}', '{17}')""".format( g['TBL_NME'], #[0] created_at, #[1] str(tweet.created_at), #[2] tweet.id, #[3] tweet.user.id, #[4] str( tweet.user.name.encode( 'ascii', 'replace').decode("utf-8")).replace( '?', '').replace("'", '').rstrip().lstrip(), #[5] str( tweet.user.screen_name.encode( 'ascii', 'replace').decode("utf-8")).replace( '?', '').replace("'", '').rstrip().lstrip(), #[6] str( tweet.user.location.encode( 'ascii', 'replace').decode("utf-8")).replace( '?', '').replace( "'", '').rstrip().lstrip().upper(), #[7] str(place_id), #[8] cc, #[9] str( tweet.place.name.encode( 'ascii', 'replace').decode("utf-8")).replace( '?', '').replace( "'", '').rstrip().lstrip().upper(), #[10] sentmnt_mtch, #[11] str( tweet.text.encode( 'ascii', 'replace').decode("utf-8")).replace( '?', '').replace("'", '').replace( '\n', '. ').replace('. . ', '. '), #[12] ('NOVAL' if tweet.in_reply_to_status_id_str is None else str( tweet.in_reply_to_status_id_str).upper()), #[13] str(tweet.retweeted).upper(), #[14] g['MSMT_DTE_ID'], #[15] g['STARTED_AT'], #[16] '' #[17] ) #print(q) dbmgr.query(q) tweetCount += len(tweets) print("Downloaded {0} tweets".format(tweetCount)) max_id = tweets[-1].id except tweepy.TweepError as e: # capture a finish time to be entered into the db finished_at = time.strftime("%Y-%m-%d %H:%M:%S") e = sys.exc_info() print('ERROR ENCOUNTERED : ' + str(e)) # ============================================================================= # WRITE RESULTS OF ERROR TO LOCAL DB # ============================================================================= dbmgr = pyDB(g['DB']) dbmgr.write_log(finished_at, 'TWITTER SEARCH ERROR : ' + str(e), **g) # ============================================================================= # EMAIL SUMMARY OF ERROR TO TO DISTRIBUTION LIST # ============================================================================= htmlRes = '''<table cellpadding="8" cellspacing="3" border="3"> <tr> <th>msmt_date_id</th> <th>pkg_nme</th> <th>start_datetime</th> <th>end_datetime</th> <th>status</th> </tr>''' htmlRes = htmlRes + '<tr><td>' + str( g['MSMT_DTE_ID'] ) + '</td><td>' + str(g['PKG_NME']) + '</td><td>' + str( g['STARTED_AT']) + '</td><td>' + str( finished_at) + '</td><td>' + 'ERROR' + '</td></tr>' htmlRes = htmlRes + '</table>' # SUBJECT & RECIPIENTS mymail = pyMail( str(g['PKG_NME']) + ' - ERROR ENCOUNTERED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL). mymail.htmladd( 'A TWITTER SEARCH ERROR was encountered for package : ' + str(g['PKG_NME'])) # FURTHER DETAILS ADDED TO BODY (SEPERATED BY A PARAGRAPH SO LINE FEEDS NOT REQUIRED) # ADD LINE OF TEXT mymail.htmladd('Summary of ERROR') # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes) # HEADER FOR ERROR TEXT mymail.htmladd('<b><u>ERROR DETAIL</u></b>') # ADD FULL ERROR TO BODY OF EMAIL mymail.htmladd(str(e).replace('<', '(').replace('>', ')')) # SEND mymail.send() # QUIT EXECUTION OF PYTHON SCRIPT # EXIT IF ANY ERROR print("some error : " + str(e)) break print("Downloaded {0} tweets".format(tweetCount))
def htmlPass(url, **g): try: # ================================================================ # EXTRACT HTML USING PARSER OR WEB DRIVER CONFIG # ================================================================ if g['USES_WEB_DRVR'] == 'N': #requests.get('https://api.github.com/user', auth=('user', 'pass')) #headers = {'User-agent': 'Google Chrome'} #html = requests.get(url, headers=headers) html = Request(url) html.add_header = [('User-agent', 'Google Chrome')] html = urlopen(html).read() elif g['USES_WEB_DRVR'] == 'Y': driver = webdriver.Chrome( executable_path=str(g['DRVR_PATH'] + '\\' + g['WEB_DRVR_NME'])) #chromeDrvr driver.get(url) # SLEEP REQUIRED DUE TO SEEK TRYING TO REDIRECT PAGE AND MESSING WITH THE CAPTURE OF LINK # FORCES A WAIT FOR PAGE TO PROPERLY RENDER BEFORE CAPTURING HTML if 'SEEK' in url.upper(): time.sleep( 10 ) # INSERTS PAUSE TO ASSIST REFLECTING HUMAN INTERACTION ON WEBPAGE html = driver.page_source driver.close() driver.quit() driver.stop_client() return html except: # capture a finish time to be entered into the db finished_at = time.strftime("%Y-%m-%d %H:%M:%S") e = sys.exc_info() print('ERROR ENCOUNTERED : ' + str(e)) # ============================================================================= # WRITE RESULTS OF ERROR TO LOCAL DB # ============================================================================= dbmgr = pyDB(g['DB']) dbmgr.write_log(finished_at, 'HTML PASSING ERROR: ' + str(e), **g) # ============================================================================= # EMAIL SUMMARY OF ERROR TO TO DISTRIBUTION LIST # ============================================================================= htmlRes = '''<table cellpadding="8" cellspacing="3" border="3"> <tr> <th>msmt_date_id</th> <th>pkg_nme</th> <th>start_datetime</th> <th>end_datetime</th> <th>status</th> </tr>''' htmlRes = htmlRes + '<tr><td>' + str( g['MSMT_DTE_ID']) + '</td><td>' + str( g['PKG_NME']) + '</td><td>' + str( g['STARTED_AT']) + '</td><td>' + str( finished_at) + '</td><td>' + 'ERROR' + '</td></tr>' htmlRes = htmlRes + '</table>' # SUBJECT & RECIPIENTS mymail = pyMail( str(g['PKG_NME']) + ' - ERROR ENCOUNTERED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL). mymail.htmladd( 'A HTML PASSING ERROR was encountered for package : ' + str(g['PKG_NME'])) # FURTHER DETAILS ADDED TO BODY (SEPERATED BY A PARAGRAPH SO LINE FEEDS NOT REQUIRED) # ADD LINE OF TEXT mymail.htmladd('Summary of ERROR') # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes) # HEADER FOR ERROR TEXT mymail.htmladd('<b><u>ERROR DETAIL</u></b>') # ADD FULL ERROR TO BODY OF EMAIL mymail.htmladd(str(e).replace('<', '(').replace('>', ')')) # SEND mymail.send() # QUIT EXECUTION OF PYTHON SCRIPT quit()
def htmlDownloadLink(url, fileSearchStr, linkId, **g): # RANDOM TIMER TO MAKE ANY LOOPING CALLS TO A URL APPEAR MORE "HUMAN" try: rndm_sleep = int(g['SLEEP_VAL']) except: rLow = int(g['LOOP_RNDM_SLEEP_LOW']) rHigh = int(g['LOOP_RNDM_SLEEP_HIGH']) rndm_sleep = random.randint(rLow, rHigh) try: # ================================================================ # DOWNLOAD FILE FROM PAGE LINK # ================================================================ # add missing support for chrome "send_command" to selenium webdriver # TRY 1 - NOT WORKING # driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command') # params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': g['CONFIG']['DB_DIR'] + '__fx'}} # command_result = driver.execute("send_command", params) # TRY 2 - NOT WORKING - STILL SAVES TO DEFAULT DIRECTORY chromeOptions = webdriver.ChromeOptions() chromeOptions.add_argument("--start-maximized") prefs = { "profile.default_content_settings.popups": 0, "download.default_directory": r"g['CONFIG']['DB_DIR']" + "__fx\\", # IMPORTANT - ENDING SLASH V IMPORTANT "directory_upgrade": True } chromeOptions.add_experimental_option("prefs", prefs) driver = webdriver.Chrome( executable_path=str(g['DRVR_PATH'] + '\\' + g['WEB_DRVR_NME']), chrome_options=chromeOptions) #chromeDrvr driver.get(url) if linkId == '': None else: dlLink = driver.find_element_by_id(linkId).click( ) #instantiate a click on the desired page element time.sleep(int(rndm_sleep)) #NOT WORKING - GET STUCK IN ENDLESS LOOP # for file in os.listdir(g['DEFAULT_SYS_DOWNLOAD_PATH']): # if file.endswith(fileSearchStr + '.crdownload') or file.endswith(fileSearchStr + '.part'): # while True: # ascii/tick-data-quotes/eurusd/2017/10 # if file.endswith(fileSearchStr + '.crdownload') or file.endswith(fileSearchStr + '.part'): # time.sleep(10) # elif file.endswith(fileSearchStr): # break # else: # time.sleep(10) # else: # None driver.close() driver.quit() driver.stop_client() #return dlLink except: # capture a finish time to be entered into the db finished_at = time.strftime("%Y-%m-%d %H:%M:%S") e = sys.exc_info() print('ERROR ENCOUNTERED : ' + str(e)) # ============================================================================= # WRITE RESULTS OF ERROR TO LOCAL DB # ============================================================================= dbmgr = pyDB(g['DB']) dbmgr.write_log(finished_at, 'HTML PASSING ERROR: ' + str(e), **g) # ============================================================================= # EMAIL SUMMARY OF ERROR TO TO DISTRIBUTION LIST # ============================================================================= htmlRes = '''<table cellpadding="8" cellspacing="3" border="3"> <tr> <th>msmt_date_id</th> <th>pkg_nme</th> <th>start_datetime</th> <th>end_datetime</th> <th>status</th> </tr>''' htmlRes = htmlRes + '<tr><td>' + str( g['MSMT_DTE_ID']) + '</td><td>' + str( g['PKG_NME']) + '</td><td>' + str( g['STARTED_AT']) + '</td><td>' + str( finished_at) + '</td><td>' + 'ERROR' + '</td></tr>' htmlRes = htmlRes + '</table>' # SUBJECT & RECIPIENTS mymail = pyMail( str(g['PKG_NME']) + ' - ERROR ENCOUNTERED @ ' + time.strftime("%Y-%m-%d %H:%M:%S"), **g) # START HTML BODY (GREETING / OPENING LINE OF EMAIL). mymail.htmladd( 'A DOWNLOAD LINK ERROR was encountered for package : ' + str(g['PKG_NME'])) # FURTHER DETAILS ADDED TO BODY (SEPERATED BY A PARAGRAPH SO LINE FEEDS NOT REQUIRED) # ADD LINE OF TEXT mymail.htmladd('Summary of ERROR') # ADD HTML TABLE CONSTRUCTED ABOVE mymail.htmladd(htmlRes) # HEADER FOR ERROR TEXT mymail.htmladd('<b><u>ERROR DETAIL</u></b>') # ADD FULL ERROR TO BODY OF EMAIL mymail.htmladd(str(e).replace('<', '(').replace('>', ')')) # SEND mymail.send() # QUIT EXECUTION OF PYTHON SCRIPT quit()