def readsavedpdf(path,link): file = None tfile = None try: file = open(path, "rb") path = path.replace('.pdf','') tfile = open(path +".txt",'w') reader = PdfFileReader(file) data = "" for i in range(0, reader.getNumPages()): data += reader.getPage(i).extractText() + "\n" #data = " ".join(data.replace(u"\xa0", " ").strip().split()) data = " ".join(data.replace(u"\u02c7", " ").strip().split()) tfile.write(link) tfile.write("\n") tfile.write(data) tfile.flush() #print("Successfully read pdf file") except Exception as e: writelog("Exception in reading: " + path + " " +str(e)) finally: if file != None: file.close() if tfile != None: tfile.close()
def readpdffromweb(url): link = url tfile = None try: response = urlopen(url) tempfile = url.split('/')[-1] tfile = open("collection/"+tempfile, 'wb') tfile.write(response.read()) tfile.flush() #tfile.close() readsavedpdf("collection/"+tempfile,link) tfile.close() #os.remove("collection/" + tempfile) #os.remove("collection/" + tempfile) except Exception as e: writelog("Exception in crawling: " + url + " " + str(e)) #print("could not read ") finally: if tfile != None: if not tfile.closed: tfile.close() if tfile.closed: os.remove("collection/" + tempfile)
def amigo_init(tick, category, freq): # Get column name cname = getconf(category) if not cname: p = [] p.append(tick) p.append(category) p.append(freq) writelog('[CRITICAL] No Configuration File Found', 'amigo_init', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 tname = 'amigo_' + tick + '_' + category.replace('-', '_') + '_' + freq # Erase WipeOut Old table tdrop = 'DROP TABLE IF EXISTS ' + tname dberase(tdrop) # Create table tcreate1 = 'CREATE TABLE ' + tname + ' ( id INT(3) UNSIGNED AUTO_INCREMENT PRIMARY KEY, ' tcreate2 = '' for li in cname: tcreate2 = tcreate2 + str(li) + ' VARCHAR(10), ' tcreate2 = tcreate2[:-2] + ')' dbquery(tcreate1 + tcreate2) return 0
def yql_real(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/d/quotes.csv?s=' + tick + '&f=b2b3c6ej3m2r2j1', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' } ) html = urlopen(req) data = html.read() # Parsing soup = BeautifulSoup(data, 'html.parser') except URLError as e: writelog('[CRITICAL] URL ERROR Encountered' + str(e), 'yql_real', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_real', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_real', p) if (attempts < 3): r = yql_growth(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_real', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_real', p) return 1 # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') soup = str(soup) ts = soup.split(',') # Delete Row dquery = 'DELETE FROM yql_real WHERE tick = \'' + tick + '\'' dbquery(dquery) # Insert Row iquery = 'INSERT INTO yql_real (tick, ask, bid, rchange, es, marketcap, dayr, pe, smc) VALUES (\'' + tick + '\',' for ele in ts: iquery = iquery + '\'' + ele + '\', ' iquery = iquery[:-2] + ')' dbquery(iquery) return 0
def dberase(query): cn = getconf('db') if not cn: writelog('Unable to Read Database Configuration File!', 'dbquery', 'query') sys.exit('Database Configuration Not Found --- Exiting...') conn = connect( host = cn[0], port = int(cn[1]), user= cn[2], passwd = cn[3], db = cn[4]) cursor = conn.cursor() cursor.execute(query) conn.commit() cursor.close() conn.close()
def real_populate(): p = [] writelog('[INFO] Starting Realtime Population', 'real_populate', p) conn = pymysql.connect(host='localhost', port=3306, user='******', passwd='password', db='jsong') cur = conn.cursor() cur.execute("SELECT tick FROM ticklist WHERE enabled = 1") msg = '0' count = 1 #for tick in cname: for tick1 in cur.fetchall(): tick = tick1[0] print(str(count) + ": " + tick) count = count + 1 writelog('[INFO] realtime ' + tick, 'real_populate', p) r = yql_real(tick, 0) if r == 1: p = [] p.append(tick) writelog('[CRITICAL] Error Occurred During YQL_REALTIME', 'real_populate', p) else: writelog('[SUCCESS] Successfully Retrieved Realtime data', 'real_populate', p) cur.close() conn.close()
def dictreturn(query): getcontext().prec = 6 dict={} try: cn = getconf('db') if not cn: writelog('[CRITICAL] Unable to Read Database Configuration File!', 'selectdb', 'query') sys.exit('Database Configuration Not Found --- Exiting...') conn = connect( host = cn[0], port = int(cn[1]), user= cn[2], passwd = cn[3], db = cn[4]) cursor = conn.cursor() cursor.execute(query) desc = cursor.description nlist = len(desc) colname = [] table = [] for col in desc: table.append([]) dict = {} data = cursor.fetchall() for value in data: for i in range(0,nlist): try: val = Decimal(value[i]) except: val = value[i] table[i].append(val) c = 0 for col in desc: dict[col[0]] = table[c] c = c + 1 except MySQLError as e: p = [] p.append(query) print (p) cursor.close() conn.close() sys.exit('MySQL Exception Found --- Exiting...') except Warning as e: p = [] p.append(query) errmsg = '[CRITICAL] MYSQL Warning Detected' + str(e) pass cursor.close() conn.close() return dict
def getconf(category): try: conf = open('config/' + category + '.cfg', 'r') li = [] for var in conf: var = var[:-1] var = var.replace(" ", "") li.append(var) return li except IOError as e: errmsg = '[CRITICAL] Error occured at config.py: %s' % e.strerror p = [] p.append(category) writelog(errmsg, "getconf", p) g = [] return g
def getconf(category): try: conf = open('config/' + category + '.cfg', 'r') li = [] for var in conf: var = var[:-1] var = var.replace(" ","") li.append(var) return li except IOError as e: errmsg = '[CRITICAL] Error occured at config.py: %s' % e.strerror p = [] p.append(category) writelog(errmsg, "getconf", p) g = [] return g
def yql_dividends_init(): cn = getconf('dividends') if not cn: p = [] writelog('[CRITICAL] No Configuration File Found', 'yql_dividends_init', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 d = 'DROP TABLE IF EXISTS yql_dividends' dberase(d) s = 'CREATE TABLE yql_dividends (id INT NOT NULL AUTO_INCREMENT, tick VARCHAR(10), ' for ele in cn: s = s + ele + ' VARCHAR(25), ' s = s[:-2] + ', PRIMARY KEY(id))' dbquery(s) return 0
def yql_estimates_init(tick): cn = getconf('estimates') if not cn: p = [] p.append(tick) writelog('[CRITICAL] No Configuration File Found', 'yql_estimates_init', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 d = 'DROP TABLE IF EXISTS ' + tick + '_yql_estimates' dberase(d) s = 'CREATE TABLE ' + tick + '_yql_estimates (id INT NOT NULL AUTO_INCREMENT, ' for ele in cn: s = s + ele + ' VARCHAR(15), ' s = s[:-2] + ', PRIMARY KEY(id))' dbquery(s) return 0
def yql_growth_init(tick): cn = getconf('growth') if not cn: p = [] p.append(tick) writelog('[CRITICAL] No Configuration File Found', 'yql_growth_init', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 d = 'DROP TABLE IF EXISTS ' + tick + '_yql_growth' dberase(d) s = 'CREATE TABLE ' + tick + '_yql_growth (id INT NOT NULL AUTO_INCREMENT, ' for ele in cn: s = s + ele + ' VARCHAR(15), ' s = s[:-2] + ', PRIMARY KEY(id))' dbquery(s) return 0
def readContent(url): global count file = None filename = "" try: writelog("Reading content of: " + url) response = urlopen(url) data = response.read().decode("utf-8",errors='ignore') soup = BeautifulSoup(data) text = soup.get_text() name = url.split('//')[-1] n = name + str(count) + ".txt" filename = "collection/" + n.replace('/','') file = open(filename,'w') file.write(url) file.write('\n') file.write(text) file.flush() writelog("Successfully read: " + url) #file.close() count += 1 #file_log.write("Successfully crawled") #file_log.write('\n') except Exception as e: writelog("Exception: readContent " + url + " " + str(e)) if file != None: file.close() os.remove(filename) #print("Failed: permission denied or link not working ") pass finally: if file != None: file.close()
def dbquery(query): try: cn = getconf('db') if not cn: writelog('[CRITICAL] Unable to Read Database Configuration File!', 'dbquery', 'query') sys.exit('Database Configuration Not Found --- Exiting...') conn = connect( host = cn[0], port = int(cn[1]), user= cn[2], passwd = cn[3], db = cn[4]) cursor = conn.cursor() cursor.execute(query) conn.commit() except MySQLError as e: p = [] p.append(query) print (p) errmsg = '[CRITICAL] MYSQL Error Detected' + str(e) writelog(errmsg, 'dbquery', p) cursor.close() conn.close() sys.exit('MySQL Exception Found --- Exiting...') except Warning as e: p = [] p.append(query) errmsg = '[CRITICAL] MYSQL Warning Detected' + str(e) writelog(errmsg, 'dbquery', p) pass cursor.close() conn.close()
def retrivePage(url): url = url.strip("/") #file_log.write("now crawling" + url) if url.endswith('.pdf'): writelog("Reading content of: " + url) readpdffromweb(url) writelog("Successfully read: " + url) return [] else: readContent(url) try: response = urlopen(url) writelog("retrievePage; Fetching urls from: " + url) data = response.read().decode("utf-8",errors="ignore") soup = BeautifulSoup(data) url_list = [] visited[url] = 1 soup.prettify() for anchor in soup.findAll('a', href=True): norm_url = normalizeurl(url,anchor['href']) if not isvalidurl(norm_url):#skip rest statements in the loop and continue to remaining iteration continue if norm_url not in url_list and norm_url not in visited: url_list.append(norm_url) writelog("retrievePage; Success fetching urls from: " + url) return url_list except Exception as e: writelog("Exception: retrievePage " + url + " " + str(e)) #file_log.write("failed: permission denied or link not working") #file_log.write("\n") return []
def real_populate(): p = [] writelog('[INFO] Starting Realtime Population','real_populate',p) conn = pymysql.connect(host='localhost',port=3306, user='******', passwd='password', db='jsong') cur = conn.cursor() cur.execute("SELECT tick FROM ticklist WHERE enabled = 1") msg = '0' count = 1 #for tick in cname: for tick1 in cur.fetchall(): tick = tick1[0] print (str(count) + ": " + tick) count = count + 1 writelog('[INFO] realtime ' + tick, 'real_populate', p) r = yql_real(tick, 0) if r == 1: p = [] p.append(tick) writelog('[CRITICAL] Error Occurred During YQL_REALTIME', 'real_populate', p) else: writelog('[SUCCESS] Successfully Retrieved Realtime data', 'real_populate', p) cur.close() conn.close()
def yql_day(tick, attempts): # Web Scrapping try: req = Request( 'http://finance.yahoo.com/d/quotes.csv?s=' + tick + '&f=a2dghj4vxy', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' } ) html = urlopen(req) data = html.read() except URLError as e: p = [] p.append(tick) writelog('[CRITICAL] URL ERROR Encountered', 'yql_day', p) writelog(e, 'yql_day', p) sys.exit('[CRITICAL] URL ERROR') except HTTPError as e: p = [] p.append(tick) writelog('[WARNING] HTTP ERROR Encountered', 'yql_day', p) writelog(e, 'yql_day', p) return 1 # Parse soup = BeautifulSoup(data, 'html.parser') for tag in soup.find_all('sup'): tag.replaceWith('') soup = str(soup) ts = soup.split(',') # Erase Table dquery = 'DELETE FROM yql_day WHERE tick = ' + '\'' + tick + '\'' dbquery(dquery) iquery = 'INSERT INTO yql_day (tick, ' cn = getconf('day') if not cn: p = [] writelog('[CRITICAL] No Configuration File Found', 'yql_day', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 for ele in cn: iquery = iquery + ele + ', ' iquery = iquery[:-2] + ') VALUES (\'' + tick + '\', ' if (len(cn) == len(ts) + 1): for el in ts: el = el.replace("\n","") el = el.replace('\"','') el = el.replace("\\","") iquery = iquery + '\'' + el + '\', ' else: return 1 beta = yql_beta(tick, 0) if beta == 0: beta = 'NA' p = [] p.append(tick) writelog('Unable to collect beta', 'yql_day', p) iquery = iquery + '\'' + str(beta) + '\')' #iquery = iquery[:-3] + ')' dbquery(iquery) #print (iquery) return 0
def yql_analyst_trends(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ao?s=' + tick + '+Analyst+Opinion', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', }) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", {"class": "yfnc_datamodoutline1"}) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_analyst_trends', p) writelog(e, 'yql_analyst_summary', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_analyst_trends', p) writelog(e, 'yql_analyst_summary', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_analyst_trends', p) if (attempts < 3): r = yql_analyst_trends(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_analyst_trends', p) return 1 if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_analyst_trends', p) return 1 i = 0 cn = [] count = True try: for ele in table: if (i == 3): for row in ele.findAll("tr"): if (i > 4): if count == True: ls = len(row) for t in range(0, ls - 1): cl = [] cn.append(cl) count = False c = 0 for col in row.findAll("td"): cs = col.get_text() cn[c].append(cs) c = c + 1 i = i + 1 i = i + 1 except IndexError as e: p = [] p.append(tick) writelog('[WARNING] INDEX ERROR Encountered', 'yql_analyst_trends', p) writelog(e, 'yql_analyst_trends', p) return 1 for l in cn: s = 'INSERT INTO ' + tick + '_yql_analyst_trends (Strong_Buy, Buy, Hold, Underperform, Sell) VALUES (' for x in l: s = s + '\'' + x + '\', ' s = s[:-2] + ')' dbquery(s) return 0
def yql_highlight(tick, attempts): # Web Scrapping p = [] p.append(tick) p.append(attempts) try: req = Request( 'http://finance.yahoo.com/q/ks?s=' + tick + '+Key+Statistics', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', }) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_highlight', p) writelog(e, 'yql_highlight', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_highlight', p) writelog(e, 'yql_highlight', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_highlight', p) if (attempts < 3): r = yql_highlight(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_highlight', p) return 1 if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_highlight', p) return 1 # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", {"class": "yfnc_datamodoutline1"}) cn = getconf('highlight') if not cn: p = [] writelog('[CRITICAL] No Configuration File Found', 'yql_highlight', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 d = 'DELETE FROM yql_highlight WHERE tick = \'' + tick + '\'' dbquery(d) s = 'INSERT INTO yql_highlight (tick, ' for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (\'' + tick + '\', ' i = 0 ccl = [] try: for ele in table: #.findAll("table"): if (i >= 1) and (i <= 4): for row in ele.findAll("tr"): if len(row) == 2: for col in row.findAll("td"): if (col.get_text().find(':') == -1): #print (col.get_text()) g = col.get_text() g = g.replace(' ', '') g = g.replace('%', '') ccl.append(g) #s = s + '\"' + g + '\"' + ', ' i = i + 1 #s = s[:-2] + ')' except IndexError as e: writelog('[WARNING] INDEX ERROR Encountered ' + str(e), 'yql_highlight', p) return 1 if (len(ccl) == len(cn)): for cc in ccl: s = s + '\"' + cc + '\"' + ', ' s = s[:-2] + ')' dbquery(s) return 0 else: writelog('[CRITICAL] No Data Retrieved', 'yql_highlight', p) return 1
def yql_dividends(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ks?s=' + tick + '+Key+Statistics', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', } ) html = urlopen(req) data = html.read() except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_dividends', p) writelog(e, 'yql_dividends', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_dividends', p) writelog(e, 'yql_dividends', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_dividends', p) if (attempts < 3): r =yql_dividends(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_dividends', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_dividends', p) return 1 # Find table & Parse soup = BeautifulSoup(data, 'html.parser') for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", { "class" : "yfnc_datamodoutline1" }) cn = getconf('dividends') if not cn: writelog('[CRITICAL] No Configuration File Found', 'yql_dividends', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 d = 'DELETE FROM yql_dividends WHERE tick = \'' + tick + '\'' dbquery(d) s = 'INSERT INTO yql_dividends (tick, ' for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (\'' + tick + '\', ' ccl = [] i = 0 try: for ele in table: if (i == 9): filterc = 0 for row in ele.findAll("tr"): if len(row) == 2: for col in row.findAll("td"): if (col.get_text().endswith(':') == False): if (filterc >= 4) and (filterc != 7): g = col.get_text() g = g.replace("%", '') g = g.replace(',', '') ccl.append(g) #s = s + '\"' + g + '\"' + ', ' filterc = filterc + 1 i = i + 1 except IndexError as e: writelog('[WARNING] INDEX ERROR Encountered ' + str(e), 'yql_dividends', p) return 1 if (len(ccl) == len(cn)): for cc in ccl: s = s + '\"' + cc + '\"' + ', ' s = s[:-2] + ')' dbquery(s) return 0 else: writelog('[CRITICAL] No Data Retrieved', 'yql_dividends', p) return 1
def yql_growth(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ae?s=' + tick + '+Analyst+Estimates', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', }) html = urlopen(req) data = html.read() # Find table & parase soup = BeautifulSoup(data, 'html.parser') except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_growth', p) writelog(e, 'yql_growth', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_growth', p) writelog(e, 'yql_growth', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_growth', p) if (attempts < 3): r = yql_growth(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_growth', p) return 1 if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_growth', p) return 1 # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", {"class": "yfnc_tableout1"}) cn = getconf('growth') if not cn: writelog('[CRITICAL] No Configuration File Found', 'yql_growth', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 s = 'INSERT INTO ' + tick + '_yql_growth (' for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (' c = 0 first = True ccn = [[], [], [], []] retval = 0 try: for ele in table: if (c == 5): for row in ele.findAll("tr"): for tag in row.find_all(['table', 'style']): tag.replaceWith('') i = 0 for col in row.findAll("td"): if (i > 0): ccn[i - 1].append(col.get_text()) i = i + 1 c = c + 1 for cr in ccn: ss = s if (len(cr) == len(cn)): for cc in cr: ss = ss + '\'' + cc + '\', ' ss = ss[:-2] + ')' dbquery(ss) else: retval = 1 except IndexError as e: writelog('[WARNING] INDEX ERROR Encountered', 'yql_growth', p) writelog(e, 'yql_growth', p) return 1 return retval
logger.writelog(str(ex_type.__name__), "Exception Type") logger.writelog(str(ex_value), "Exception Message") logger.writelog(str(trace_back), "Traceback") finally: logger.result_close() rows_toload = 20000 gamma_start = 1.0e-5 gamma_end = 10.0 params = 10000 clusters = 50 logger.log_open() stepsize = round(((gamma_end - gamma_start) / params), 5) logger.writelog(gamma_start, "Gamma_start") logger.writelog(gamma_end, "Gamma_end") logger.writelog(params, "Parameters") logger.writelog(stepsize, "Step_size") logger.writelog(clusters, "clusters") ''' ------Below commented code is for loading letters----------- data,label = load_letters(datafiles_names[0],"letters.csv",rows_toload) logger.writelog(str(data.shape),"Dataset_dimension") logger.writelog(str(label.shape),"Groundtruth_dimension") df = pd.DataFrame(label) df.to_csv(datafiles_names[0]+"label.csv",index=False,header=None) del label gc.collect()
def yql_hist(tick, years, attempts): p = [] p.append(tick) p.append(years) p.append(attempts) now = datetime.datetime.now() yeara = now.year montha = now.month daya = now.day yearb = now.year - 10 monthb = now.month - 1 dayb = now.day - 2 if monthb == 0: monthb = 1 if dayb <= 0: dayb = 1 # Web Scrapping try: req = Request( 'https://ca.finance.yahoo.com/q/hp?s=' + tick + '&a=' + str(monthb) + '&b=' + str(dayb) + '&c='+ str(yearb) + '&d=' + str(montha) + '&e=' + str(daya) + '&f=' + str(yeara) + '&g=w', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' } ) html = urlopen(req) data = html.read() # Find table & parase soup = BeautifulSoup(data, 'html.parser') except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_hist', p) writelog(e, 'yql_hist', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_hist', p) writelog(e, 'yql_hist', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_hist', p) if (attempts < 3): r = yql_growth(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_hist', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_hist', p) return 1 # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find("table", { "class" : "yfnc_datamodoutline1" }) first = True # init try: ttable = table.findAll("table") for t in ttable: for row in t.findAll("tr"): #table.findAll("tr"): iquery = 'INSERT INTO ' + tick + '_hist (volume, adjclose) VALUES (' if len(row) == 7 and first == False: s = 0 for col in row.findAll("td"): if (s == 1): last = col.get_text() if (s == 5) | (s == 6): tv = col.get_text() tv = tv.replace(',','') iquery = iquery + tv + ', ' s = s + 1 iquery = iquery[:-2] + ')' #print (iquery) dbquery(iquery) else: first = False for i in range(1,years): g = yql_hist_rep(tick, i ,last, 0) if g == 1: break except IndexError as e: writelog('[WARNING] INDEX ERROR Encountered', 'yql_hist', p) writelog(e, 'yql_hist', p) return 1 return 0
def yql_competitor(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/co?s=' + tick + '+Competitors', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', }) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", {"class": "yfnc_datamodoutline1"}) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_competitor', p) writelog(e, 'yql_competitor', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_competitor', p) writelog(e, 'yql_competitor', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_competitor', p) if (attempts < 3): r = yql_competitor(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_competitor', p) return 1 if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_competitor', p) return 1 cl = [] first = True try: for ele in table: for row in ele.findAll("tr"): if (first == True): for col in row.findAll("th"): cn = [] if (first == True): first = False elif (col.get_text().find(':') > -1): break else: cn.append(col.get_text()) cl.append(cn) else: i = 0 for col in row.findAll("td"): cl[i].append(col.get_text()) i = i + 1 break except IndexError as e: writelog('[WARNING] INDEX ERROR Encountered', 'yql_competitor', p) writelog(e, 'yql_competitor', p) return 1 s = 'INSERT INTO ' + tick + '_yql_competitor (tick, ' cn = getconf('competitor') if not cn: writelog('[CRITICAL] No Configuration File Found', 'yql_competitors', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (' retval = 0 for col in cl: gs = s if (len(cn) == len(col) - 1): for row in col: gs = gs + '\'' + str(row) + '\', ' gs = gs[:-2] + ')' dbquery(gs) else: retval = 1 return retval
def dr_cluster(data, method, gamma, params, clusters, stepsize, rows_toload, dropped_class_numbers): if (method == "Kmeans2D"): components = 2 if (method == "Kmeans1D" or method == "Thresholding"): components = 1 flag = 0 resetflag = 0 logger.writelog(components, "Components") logger.result_open(method) print(method) max_sc = -100.0 best_purity = 0.0 best_gamma = 0.0 serial_num = 0 try: for i in range(0, params + 1): transformer = KernelPCA(n_components=components, kernel='rbf', gamma=gamma) data_transformed = transformer.fit_transform(data) df = pd.DataFrame(data_transformed) df.to_csv(KPCA_output_path, index=False, header=None) del df gc.collect() if (method == "Thresholding"): if (flag == 0): os.system("cc c_thresholding_new.c") flag = 1 start = timeit.default_timer() os.system("./a.out " + str(clusters) + " " + str(rows_toload)) end = timeit.default_timer() thresholding_time = (end - start) sc = silhouette.silhouette(KPCA_output_path, Thresholding_paths[1]) groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian( 't', Thresholding_paths[0], clusters, rows_toload, dropped_class_numbers) logger.writeresult(i + 1, clusters, method, thresholding_time, gamma, sc, purity) #print(i+1,thresholding_time,gamma,sc,purity) if (i < params): if (sc > max_sc): max_sc = sc best_gamma = gamma best_purity = purity serial_num = i + 1 if (i == (params - 1)): gamma = best_gamma sc = max_sc purity = best_purity if (i == params): print(best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writeresult(serial_num, clusters, method, thresholding_time, best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writefinalresult(serial_num, clusters, method, thresholding_time, best_gamma, max_sc, best_purity) write_hungarian_result(best_gamma, clusters, groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, best_purity, method, params, stepsize, dropped_class_numbers) else: kmeans_time = kmeans.kmeans(KPCA_output_path, KMeans_paths[1], clusters) kmeans.groundtruth_distribution(KMeans_paths[1], KMeans_paths[0], datafiles_names[0], datafiles_names[2], clusters) sc = silhouette.silhouette(KPCA_output_path, KMeans_paths[1]) groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian( 'k', KMeans_paths[0], clusters, rows_toload, dropped_class_numbers) logger.writeresult(i + 1, clusters, method, kmeans_time, gamma, sc, purity) #print(i+1,kmeans_time,gamma,sc,purity) if (i < params): if (sc > max_sc): max_sc = sc best_gamma = gamma best_purity = purity serial_num = i + 1 if (i == (params - 1)): gamma = best_gamma sc = max_sc purity = best_purity if (i == params): print(best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writeresult(serial_num, clusters, method, kmeans_time, best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writefinalresult(serial_num, clusters, method, kmeans_time, best_gamma, max_sc, best_purity) write_hungarian_result(best_gamma, clusters, groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, best_purity, method, params, stepsize, dropped_class_numbers) if (i < (params - 1)): gamma = gamma + stepsize except (KeyboardInterrupt, SystemExit, Exception) as ex: ex_type, ex_value, ex_traceback = sys.exc_info() trace_back = traceback.extract_tb(ex_traceback) logger.writelog(str(ex_type.__name__), "Exception Type") logger.writelog(str(ex_value), "Exception Message") logger.writelog(str(trace_back), "Traceback") finally: logger.result_close()
import time from logger import writelog try: writelog("logfile1.log", "Program started.", "O") time.sleep(5) writelog("logfile1.log", "Program finished.") except Exception as error: print("Oh no! And error has occured") print(error)
import time from logger import writelog writelog("logfile1.log", "Program Started") time.sleep(10) writelog("logfile1.log", "Program Finished")
def yql_analyst_summary(tick,attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ao?s=' + tick + '+Analyst+Opinion', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', } ) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", { "class" : "yfnc_datamodoutline1 equaltable" }) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered' + str(e), 'yql_analyst_summary', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_analyst_summary', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_analyst_summary', p) if (attempts < 3): r = yql_analyst_summary(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_analyst_summary', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_analyst_summary', p) return 1 d = 'DELETE FROM yql_analyst_summary WHERE tick = ' + '\'' + tick + '\'' dbquery(d) cn = getconf('analyst_summary') if not cn: writelog('[CRITICAL] No Configuration File Found', 'yql_analyst_summary', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 s = 'INSERT INTO yql_analyst_summary (tick, ' for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (\'' + tick + '\', ' ccl = [] for ele in table: for row in ele.findAll("tr"): for col in row.findAll("td"): if (col.get_text().find(':') == -1): ts = col.get_text() ts = ts.replace("%","") ccl.append(ts) if (len(ccl) == len(cn)): for cc in ccl: s = s + '\'' + cc + '\', ' s = s[:-2] + ')' dbquery(s) return 0 else: return 1
def amigo(tick, category, freq, attempts): p = [] p.append(tick) p.append(category) p.append(freq) p.append(attempts) try: # Web Scrapping req = Request( 'http://amigobulls.com/stocks/' + tick + '/' + category + '/' + freq, data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' }) html = urlopen(req) soup = BeautifulSoup(html, 'html.parser') for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find("table", {"id": "stackinfo"}) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'amigo', p) if (attempts < 3): r = amigo(tick, category, freq, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'amigo', p) return 1 print("Attempt Number: " + str(attempts) + " Result: " + str(r)) if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'amigo', p) writelog(e, 'amigo', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'amigo', p) writelog(e, 'amigo', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'amigo', p) if (attempts < 3): r = amigo(tick, category, freq, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'amigo', p) return 1 print("Attempt Number: " + str(attempts) + " Result: " + str(r)) if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'amigo', p) return 1 # init init = False cl = [] cd = [] for r in table.findAll("tr"): for e in r.findAll("td"): cd.append([]) break for row in table.findAll("tr"): if (init == True): s = True c = 0 for col in row.findAll("td"): ltd = col.get_text() ltd = ltd.replace("-", "") ltd = ltd.replace(" ", "") if (s == True): cl.append(ltd) s = False else: cd[c].append(ltd) c = c + 1 else: init = True cname = getconf(category) if not cname: p = [] p.append(tick) p.append(category) p.append(freq) writelog('[CRITICAL] No Configuration File Found', 'amigo', p) sys.exit('[CRITICAL] No Configuration File Found') base = 'INSERT INTO ' + 'amigo_' + tick + '_' + category.replace( '-', '_') + '_' + freq + ' (' for li in cname: base = base + li + ', ' base = base[:-2] + ') VALUES (' lcd = len(cd) - 1 cd.pop(lcd) retval = 0 for z in cd: tinsert = base if z: if (len(cname) == len(z)): for cz in z: tinsert = tinsert + '\'' + cz + '\'' + ', ' tinsert = tinsert[:-2] + ');' dbquery(tinsert) else: retval = 1 return retval
def populate(list, deletet, start): p = [] p.append(list) p.append(deletet) writelog('[INFO] Populating Ticklist Number :', 'populate', p) cname = getconf("ticklist_p" + str(list)) if not cname: writelog('[CRITICAL] No Configuration File Found', 'populate', p) sys.exit('[CRITICAL] No Configuration File Found') term = "quarterly" ctick = '' rcl = [] total = len(cname) counter = 0 if (deletet == 1): writelog('[INFO] Dropping and Creating New Table', 'populate', p) s = "DROP TABLE IF EXISTS ticklist" dberase(s) s = "CREATE TABLE ticklist (id INT NOT NULL AUTO_INCREMENT, tick VARCHAR(10) NOT NULL, enabled VARCHAR(5) NOT NULL, status VARCHAR(5) NOT NULL, manual VARCHAR(5) NOT NULL, marketcap VARCHAR(20), sector VARCHAR(100), industry VARCHAR(100), errorfnc VARCHAR(200), enabledp VARCHAR(100), PRIMARY KEY(id))" dbquery(s) for tick in cname: counter = counter + 1 if (counter >= start): outputstr = (str(counter)) + ":" + tick print(outputstr) writelog('[INFO] Progress: ' + str(counter) + ' / ' + str(total), 'populate', p) success = 0 failure = 0 failed_functions = [] writelog('[INFO] Re-Initializing Tables Now......(' + tick + ')', 'populate', p) if (amigo_init(tick, "balance-sheet", term) == 0): tname = 'amigo_' + tick + '_' + "balance_sheet" + '_' + term writelog('[SUCCESS] Initialized ' + tname + ' table', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tname + ' table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tname + ' table') time.sleep(1) if (amigo_init(tick, "cash-flow", term) == 0): tname = 'amigo_' + tick + '_' + "cash_flow" + '_' + term writelog('[SUCCESS] Initialized ' + tname + ' table', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tname + ' table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tname + ' table') time.sleep(1) if (amigo_init(tick, "income-statement", term) == 0): tname = 'amigo_' + tick + '_' + "income_statement" + '_' + term writelog('[SUCCESS] Initialized ' + tname + ' table', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tname + ' table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tname + ' table') if (yql_analyst_trends_init(tick) == 0): writelog( '[SUCCESS] Initialized ' + tick + '_yql_analyst_trends', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tick + '_yql_analyst_trends table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_analyst_trends table') if (yql_competitor_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_competitor', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tick + '_yql_competitor table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_competitor table') if (yql_estimates_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_estimates', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tick + '_yql_estimates table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_estimates table') if (yql_growth_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_growth', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tick + '_yql_growth table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_growth table') if (yql_hist_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_hist', 'populate', p) else: writelog( '[CRITICAL] Unable to create database ' + tick + '_yql_hist table', 'populate', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_hist table') writelog('[INFO] Populating Tables Now......(' + tick + ')', 'populate', p) if (amigo(tick, "balance-sheet", term, 0) == 0): success = success + 1 else: failure = failure + 1 ff = "amigo-balance-sheet" failed_functions.append(ff) if (amigo(tick, "cash-flow", term, 0) == 0): success = success + 1 else: failure = failure + 1 ff = "amigo-cash-flow" failed_functions.append(ff) if (amigo(tick, "income-statement", term, 0) == 0): success = success + 1 else: failure = failure + 1 ff = "amigo-income-statement" failed_functions.append(ff) if (yql_analyst_summary(tick, 0) == 0): success = success + 1 else: failure = failure + 1 ff = "yql_analyst_summary" failed_functions.append(ff) if (yql_analyst_trends(tick, 0) == 0): success = success + 1 else: failure = failure + 1 ff = "yql_analyst_trends" failed_functions.append(ff) if (yql_competitor(tick, 0) == 0): success = success + 1 else: failure = failure + 1 ff = "yql_competitor" failed_functions.append(ff) if (yql_day(tick, 0) == 0): success = success + 1 else: ff = "yql_day" failed_functions.append(ff) if (yql_dividends(tick, 0) == 0): success = success + 1 else: ff = "yql_dividends" failed_functions.append(ff) if (yql_estimates(tick, 0) == 0): success = success + 1 else: ff = "yql_estimates" failed_functions.append(ff) if (yql_growth(tick, 0) == 0): success = success + 1 else: ff = "yql_growth" failed_functions.append(ff) if (yql_highlight(tick, 0) == 0): success = success + 1 else: ff = "yql_highlight" failed_functions.append(ff) writelog( '[INFO] Populating Database Tables Complete! ' + '(' + tick + ')', 'populate', p) # Delete Row dquery = 'DELETE FROM ticklist WHERE tick = \'' + tick + '\'' dbquery(dquery) if (failure > 0): flist = functostr(failed_functions) wmsg = '[WARNING] Encountered Some Failures While Populating Database for ' + tick + '. \nSuccess: ' + str( success) + ' Failure: ' + str( failure) + '\nList of Failed Functions: ' + flist writelog(wmsg, 'populate', p) s = "INSERT INTO ticklist (tick, enabled, status, manual, errorfnc) VALUES (\'" + tick + "\', \'1\', \'1\', \'1\', \'" + flist + "\')" else: writelog( '[SUCCESS] Populated Database Tables For ' + tick + ' Without Error', 'populate', p) s = "INSERT INTO ticklist (tick, enabled, status, manual) VALUES (\'" + tick + "\', \'1\', \'0\', \'1\')" dbquery(s) time.sleep(7) writelog('[INFO] Finished Populating Database', 'populate', p) writelog('[INFO] Finished Populating Ticklist Number: ' + str(list), 'populate', p)
def yql_competitor(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/co?s=' + tick + '+Competitors', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', } ) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", { "class" : "yfnc_datamodoutline1" }) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_competitor', p) writelog(e, 'yql_competitor', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_competitor', p) writelog(e, 'yql_competitor', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_competitor', p) if (attempts < 3): r = yql_competitor(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_competitor', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_competitor', p) return 1 cl = [] first = True try: for ele in table: for row in ele.findAll("tr"): if (first == True): for col in row.findAll("th"): cn = [] if (first == True): first = False elif (col.get_text().find(':') > -1): break else: cn.append(col.get_text()) cl.append(cn) else: i = 0 for col in row.findAll("td"): cl[i].append(col.get_text()) i = i + 1 break except IndexError as e: writelog('[WARNING] INDEX ERROR Encountered', 'yql_competitor', p) writelog(e, 'yql_competitor', p) return 1 s = 'INSERT INTO ' + tick + '_yql_competitor (tick, ' cn = getconf('competitor') if not cn: writelog('[CRITICAL] No Configuration File Found', 'yql_competitors', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (' retval = 0 for col in cl: gs = s if (len(cn) == len(col) - 1): for row in col: gs = gs + '\'' + str(row) + '\', ' gs = gs[:-2] + ')' dbquery(gs) else: retval = 1 return retval
def yql_estimates(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ae?s=' + tick + '+Analyst+Estimates', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', } ) html = urlopen(req) data = html.read() except URLError as e: writelog('[CRITICAL] URL ERROR Encountered ' + str(e), 'yql_estimates', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_estimates', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_estimates', p) if (attempts < 3): r = yql_competitor(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_estimates', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_estimates', p) return 1 # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", { "class" : "yfnc_tableout1" }) cn = getconf('estimates') s = 'INSERT INTO ' + tick + '_yql_estimates (' try: for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (' c = 0 ccn = [[],[],[],[]] for ele in table: if (c >= 0) and (c <= 2): for row in ele.findAll("tr"): for tag in row.find_all(['table','style']): tag.replaceWith('') i = 0 for col in row.findAll("td"): if (i > 0): ccn[i - 1].append(col.get_text()) i = i + 1 c = c + 1 except IndexError as e: p = [] p.append(tick) writelog('[WARNING] INDEX ERROR Encountered', 'yql_estimates', p) writelog(e, 'yql_estimates', p) return 1 retval = 0 for cr in ccn: ss = s if (len(cr) == len(cn)): for cc in cr: ss = ss + '\'' + cc + '\', ' ss = ss[:-2] + ')' dbquery(ss) else: retval = 1 return retval
def yql_updatetick(tick,attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/in?s=' + tick, data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', } ) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", { "class" : "yfnc_datamodoutline1" }) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'updatetick', p) writelog(e, 'updatetick', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'updatetick', p) writelog(e, 'updatetick', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'updatetick', p) if (attempts < 3): r = yql_updatetick(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'updatetick', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'updatetick', p) return 1 i = 0 cn = [] count = True try: for ele in table: for tr in ele.findAll("tr"): if i >= 1: for td in tr.findAll("td"): cn.append(td.get_text()) i = i + 1 except IndexError as e: p = [] p.append(tick) writelog('[WARNING] INDEX ERROR Encountered', 'updatetick', p) writelog(e, 'updatetick', p) return 1 if (len(cn) == 2): s = "UPDATE ticklist SET sector = \'" + cn[0] + "\', industry = \'" + cn[1] + "\' WHERE tick = \'" + tick + "\'" dbquery(s) return 0
def init_db(): p = [] writelog('[INFO] Starting Database Initializer', 'initializer',p) cname = getconf("ticklist") if not cname: writelog('[CRITICAL] No Configuration File Found', 'initializer', p) sys.exit('[CRITICAL] No Configuration File Found') # Single Table if (yql_analyst_summary_init() == 0): writelog('[SUCCESS] Initialized yql_analyst_summary table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database yql_analyst_summary', 'initializer', p) sys.exit('[CRITICAL] Unable to create database yql_analyst_summary') if (yql_day_init() == 0): writelog('[SUCCESS] Initialized yql_day table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database yql_day table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database yql_day table') if (yql_dividends_init() == 0): writelog('[SUCCESS] Initialized yql_day table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database yql_day table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database yql_day table') if (yql_highlight_init() == 0): writelog('[SUCCESS] Initialized yql_highlight table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database yql_highlight table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database yql_highlight table') if (yql_real_init() == 0): writelog('[SUCCESS] Initialized yql_real table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database yql_real table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database yql_real table') term = "quarterly" for tick in cname: tick = tick.replace(" ","") if (amigo_init(tick, "balance-sheet", term) == 0): tname = 'amigo_' + tick + '_' + "balance_sheet" + '_' + term writelog('[SUCCESS] Initialized ' + tname + ' table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tname + ' table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tname + ' table') if (amigo_init(tick, "cash-flow", term) == 0): tname = 'amigo_' + tick + '_' + "cash_flow" + '_' + term writelog('[SUCCESS] Initialized ' + tname + ' table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tname + ' table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tname + ' table') if (amigo_init(tick, "income-statement", term) == 0): tname = 'amigo_' + tick + '_' + "income_statement" + '_' + term writelog('[SUCCESS] Initialized ' + tname + ' table', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tname + ' table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tname + ' table') if (yql_analyst_trends_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_analyst_trends', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tick + '_yql_analyst_trends table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_analyst_trends table') if (yql_competitor_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_competitor', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tick + '_yql_competitor table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_competitor table') if (yql_estimates_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_estimates', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tick + '_yql_estimates table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_estimates table') if (yql_growth_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_growth', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tick + '_yql_growth table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_growth table') if (yql_hist_init(tick) == 0): writelog('[SUCCESS] Initialized ' + tick + '_yql_hist', 'initializer', p) else: writelog('[CRITICAL] Unable to create database ' + tick + '_yql_hist table', 'initializer', p) sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_hist table') writelog('[SUCCESS] Initializing Database Tables Complete!', 'initializer', p)
def yql_beta(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ks?s=' + tick + '+Key+Statistics', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', } ) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.findAll("table", { "class" : "yfnc_datamodoutline1" }) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered', 'yql_beta', p) writelog(e, 'yql_beta', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered', 'yql_beta', p) writelog(e, 'yql_beta', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_beta', p) if (attempts < 3): r = yql_beta(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_beta', p) return 1 if (r == 0): return 0 else: writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_beta', p) return 1 beta = False next = False try: for ele in table: for row in ele.findAll("tr"): for col in row.findAll("td"): if next == True: beta = True return (str(col.get_text())) if re.match("^Beta:", col.get_text()): next = True except IndexError as e: p = [] p.append(tick) writelog('[WARNING] INDEX ERROR Encountered', 'yql_beta', p) writelog(e, 'yql_beta', p) return 1 return 1
def yql_analyst_summary(tick, attempts): p = [] p.append(tick) p.append(attempts) # Web Scrapping try: req = Request( 'http://finance.yahoo.com/q/ao?s=' + tick + '+Analyst+Opinion', data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36', }) html = urlopen(req) data = html.read() # Find table & Parse soup = BeautifulSoup(data, 'html.parser') # Remove subscripts for tag in soup.find_all('sup'): tag.replaceWith('') table = soup.find_all("table", {"class": "yfnc_datamodoutline1 equaltable"}) except URLError as e: writelog('[CRITICAL] URL ERROR Encountered' + str(e), 'yql_analyst_summary', p) return 1 except HTTPError as e: writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_analyst_summary', p) return 1 except http.client.IncompleteRead as e: writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_analyst_summary', p) if (attempts < 3): r = yql_analyst_summary(tick, attempts + 1) else: writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_analyst_summary', p) return 1 if (r == 0): return 0 else: writelog( '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_analyst_summary', p) return 1 d = 'DELETE FROM yql_analyst_summary WHERE tick = ' + '\'' + tick + '\'' dbquery(d) cn = getconf('analyst_summary') if not cn: writelog('[CRITICAL] No Configuration File Found', 'yql_analyst_summary', p) sys.exit('[CRITICAL] No Configuration File Found') return 1 s = 'INSERT INTO yql_analyst_summary (tick, ' for ele in cn: s = s + ele + ', ' s = s[:-2] + ') VALUES (\'' + tick + '\', ' ccl = [] for ele in table: for row in ele.findAll("tr"): for col in row.findAll("td"): if (col.get_text().find(':') == -1): ts = col.get_text() ts = ts.replace("%", "") ccl.append(ts) if (len(ccl) == len(cn)): for cc in ccl: s = s + '\'' + cc + '\', ' s = s[:-2] + ')' dbquery(s) return 0 else: return 1