def save_NEWS(filepath): #### Main News Program ###### logger = loglib.getlogger('news_util') url_dict={"Market Realist":['http://marketrealist.com/feed/'], "ValueWalk":['http://feeds.feedburner.com/VWTwitterFeed'], "ECONOMIST":['http://www.economist.com/sections/business-finance/rss.xml'], "YahooFinance":['http://finance.yahoo.com/news/rss'], "CNBC":["http://www.cnbc.com/id/15839135/device/rss/rss.html"], "BusinessInsider":["http://feeds.feedburner.com/businessinsider"], "MarketWatch":["http://feeds.marketwatch.com/marketwatch/realtimeheadlines"], "Reuters":["http://feeds.reuters.com/news/usmarkets"], "Bloomberg View":["http://www.bloombergview.com/rss"], "ZeroHedge":["http://feeds.feedburner.com/zerohedge/feed"], "BIDNESS ETC":["http://www.bidnessetc.com/businessnewsfeed/"], "Benzinga":["http://feeds.benzinga.com/benzinga/analyst-ratings/price-target" ,"http://feeds.benzinga.com/benzinga/analyst-ratings/downgrades","http://feeds.benzinga.com/benzinga/analyst-ratings/upgrades" ]} seperator="LINE_SEPERATOR" f1=open(filepath,'w') header="Source"+seperator+"Title"+seperator+"Description"+seperator+"Link"+seperator+"pubDate" f1.write(header) f1.write('\n') for url_name,list_url in url_dict.iteritems(): for url in list_url: feed = feedparser.parse(url) try: logger.info("Getting news for" + url_name) for entry in feed["entries"]: list_entries=[] list_entries.append(url_name) list_entries.append(entry.title) list_entries.append(entry.summary) list_entries.append(entry.link) list_entries.append(entry.published) content=seperator.join(list_entries) #print content f1.write(content.encode('ascii', 'ignore').strip().replace("\n","")) f1.write("\n") except Exception ,ex: logger.error(ex)
import datetime from MyConfig import MyConfig as cfg import traceback import os import MySQLdb from util import loglib from dao import dbdao, mongodao logger = loglib.getlogger('dataimport') BS_START_DATE=datetime.datetime(1993, 01, 01) NONBS_START_DATE=datetime.datetime(2010, 01, 01) #path = 'historical_data/' def import_data_yahoo_to_files( list_symbols,path,startdate): list_error=[] logger.info("importing from "+str(startdate)) for symbol in list_symbols: try : prices_df = DataReader(symbol, "yahoo", startdate)
from dao import dbdao from util import loglib logger = loglib.getlogger('notable moves') f=open('queries/notable_moves.sql') queries=f.read().strip().split(';') print len(queries) #last query is empty queries=queries[:len(queries)-1] dbdao.execute_query(queries)
import urllib2 from BeautifulSoup import BeautifulSoup import re import pandas as pd from dao2 import dbdao from util import loglib logger = loglib.getlogger('sector_industry') def get_csv(url): try: #url=url.replace('html','csv') base_url="http://biz.yahoo.com/p/" url=base_url+url print url page = urllib2.urlopen(url,timeout = 10) html = page.read() soup = BeautifulSoup(html) sector="" industry="" list_symbol=[] for y in soup.findAll('table' ,attrs={"bgcolor" : "dcdcdc"}): for x in y.findAll('td',attrs={"bgcolor" : "ffffee"}): if("Sector:" in x.text ): sector=x.text.replace("Sector:","") elif("Industry:" in x.text ): industry=x.text.replace("Industry:","").replace("(More Info)","") else:
import urllib2 from BeautifulSoup import BeautifulSoup import pandas as pd from dao2 import dbdao from util import loglib import finsymbols logger = loglib.getlogger('symbols') def get_all_symbols(): sp500 = finsymbols.get_sp500_symbols() df_sp500=pd.DataFrame(sp500) df_sp500['exchange']='SPY500' amex_symbols=finsymbols.get_amex_symbols() df_amex=pd.DataFrame(amex_symbols) df_amex['exchange']='AMEX' nyse_symbols=finsymbols.get_nyse_symbols() df_nyse=pd.DataFrame(nyse_symbols) df_nyse['exchange']='NYSE' nasdaq_symbols=finsymbols.get_nasdaq_symbols() df_nasdaq=pd.DataFrame(nasdaq_symbols)
def correct_name(x): if('Forward Annual Dividend Yield' in x): return 'DividendYield' list_exlusion=['Moving Average','52','Annual Dividend','% Held by','Avg Vol','Shares Short (prior','Short %','Ex-Dividend Date'] for name in list_exlusion: if(name in x): return 'drop' x=x.strip().replace(' ','') x=x.strip().replace('/','') return x.split('(')[0].split(':')[0].strip() logger = loglib.getlogger('stats') import sys print sys.argv start,end=sys.argv[1],sys.argv[2] def getstats(): df_all= pd.DataFrame() #list_symbol=dbdao.get_symbols_list_limit(start,end) list_symbol=dbdao.get_missing_stats_symbol(start,end) if(start=='0'): dbdao.execute_query(["delete from df_stats"]) print list_symbol
from datetime import timedelta import datetime from pandas.io.data import DataReader import logging.config from MyConfig import MyConfig as cfg import traceback from util import loglib from bl.import_data import dataimport from dao import dbdao logger = loglib.getlogger('historicaldataimport') import sys dataimport.importdata(dbdao.get_symbols_list())
import urllib2 from BeautifulSoup import BeautifulSoup from dao import dbdao import os import pandas as pd from util import df_util, loglib logger = loglib.getlogger("symbol_holding") def get_holdings(symbol): url = "http://finance.yahoo.com/q/hl?s=" + symbol + "+Holdings" print url page = urllib2.urlopen(url, timeout=10) soup = BeautifulSoup(page) filepath = "data/holdings/" + symbol + ".csv" f1 = open(filepath, "w") columns = "Symbol^holding_company^holding_symbol^holding_pct" f1 = open(filepath, "w") f1.write(columns) f1.write("\n") all_tables = soup.findAll("table", attrs={"class": "yfnc_tableout1"}) if len(all_tables) > 1: holdings = all_tables[0].findAll("td", attrs={"class": "yfnc_tabledata1"}) count = 0 list_holding = [] for holding in holdings: list_holding.append(holding.text)
import MySQLdb import os import pandas as pd from MyConfig import MyConfig as cfg import MySQLdb.cursors import pandas.io.sql as psql import csv from util import loglib import datetime from dateutil.relativedelta import relativedelta from blaze.tests.test_sql import sql from mock import inplace logger = loglib.getlogger('dbutil_new') def savealerts(df): if('newvalue' not in df): df['newvalue']='' df=df[['symbol','sign','typeid','newvalue']] print df save_dataframe(df, "df_alerts") def remove_symbol( symbol): dbcon = MySQLdb.connect( host=cfg.mysqldb_host, user=cfg.mysqldb_user, passwd=cfg.mysqldb_passwd, db=cfg.mysqldb_db) sql = "delete from history_symbol WHERE symbol='" + symbol + "'"
from util import constants, loglib, alert_constants import talib from talib import MA_Type import numpy as np from talib import abstract import math from dao import dbdao import pandas as pd from bl import price_manager, rating_manager, rsi_manager, crossover_manager,\ alert_manager from bl import trend_manager logger = loglib.getlogger('technicals_manager') def calculate_stdabove(latest_row): price=latest_row['close'] sma50=latest_row['sma50'] volatility=latest_row['volatility'] std_above=0 if(volatility!=0): std_above= ((price-sma50))/ volatility return std_above def calculate_beta(df,df_mkt,symbol): if(symbol==constants.MKT_SYMBOL): return {"beta":1}
from dao import dbdao,mongodao import datetime from dateutil.relativedelta import relativedelta import pandas as pd from util import loglib import csv from util import constants import os from bl import technical_manager import traceback logger = loglib.getlogger('technicals') def calculate_technicals(start,end): end_date_time = datetime.datetime.now() # mysqldb.get_maxdate()[0] start_date_time = end_date_time - relativedelta(days=constants.DAYS_FOR_TECHNICALS) list_symbol=dbdao.get_symbols_list_limit(start,end) list_symbol=['MSFT'] hist_dates= dbdao.get_historical_dates()
# from utils import loglib # import os from util import loglib import urllib2 # from xml.dom import minidom from BeautifulSoup import BeautifulSoup # import urllib2 # from utils import loglib # import csv from dao import dbdao logger = loglib.getlogger("stats_manager") def create_files(filepath, symbol, url): try: page = urllib2.urlopen(url, timeout=10) soup = BeautifulSoup(page) columns = "Symbol^Item^q1^q2^q3^q4" f1 = open(filepath, "w") f1.write(columns) f1.write("\n") all_tables = soup.findAll("table", attrs={"class": "yfnc_tabledata1"}) for data in all_tables: all_data = data.findAll("tr")
import urllib2 from BeautifulSoup import BeautifulSoup import re import pandas as pd from util import loglib import json from dao import dbdao logger = loglib.getlogger('google_live') def getgoogledata(list_symbol): symbols = str.join(',',list_symbol) print symbols url = 'http://finance.google.com/finance/info?q=%s' % symbols print url page = urllib2.urlopen(url,timeout = 10) html = page.read().replace("//","").strip() data = json.loads(html) df=pd.DataFrame(data) df.set_index('t',inplace=True) return df dbdao.execute_query(['delete from google_live_indices_symbol','delete from google_live_symbol']) list_symbol=dbdao.get_indices_symbols_list() print list_symbol df=getgoogledata(list_symbol)
from dao2 import dbdao from util import loglib, datetimeutil, constants from bl import historical_dates_manager import csv logger = loglib.getlogger('historical_dates_manager') def calculate_history_dates(): #mysqldao=dbutil.mysqldb() #print technicals.gethistory_dates() header = ("DateType,Date") hist_dates = historical_dates_manager.gethistory_dates() filename = 'data/historical_date/hist_dates' f1 = open(filename, 'wb') f1.write(header) f1.write('\n') writer = csv.writer(f1) for key, value in hist_dates.items(): writer.writerow([key, value]) f1.close() delete_sql = "delete from historicaldates" insert_sql = """ LOAD DATA LOCAL INFILE '%s' INTO TABLE historicaldates FIELDS TERMINATED BY ',' LINES TERMINATED BY "\n" IGNORE 1 LINES (@DateType,@Date)
import datetime import datetime as dt from dateutil.relativedelta import relativedelta #from utils import fin from util import loglib, datetimeutil, constants from datetime import timedelta from dao import mongodao logger = loglib.getlogger('historical_dates_manager') def gethistory_dates(): end_date = dt.datetime.now() start_date = end_date + relativedelta(years=-2) # df1 = mongodao.get_symbollist_data(list_symbols, start_date, end_date) curdate = dt.datetime.now().date() month = curdate.month monthstartdate = dt.date(curdate.year, curdate.month, 1) yearstartdate = dt.date(curdate.year, 1, 1) if (month > 3): if (month > 3 and month <= 6): quatermonth = 4; elif (month > 6 and month <= 9): quatermonth = 7;
import urllib2 from BeautifulSoup import BeautifulSoup from dao import dbdao import os import pandas as pd from util import df_util, loglib logger = loglib.getlogger('symbol_holding') def get_holdings(symbol): url = 'http://finance.yahoo.com/q/hl?s=' + symbol + '+Holdings' print url page = urllib2.urlopen(url, timeout=10) soup = BeautifulSoup(page) filepath = "data/holdings/" + symbol + ".csv" f1 = open(filepath, "w") columns = "Symbol^holding_company^holding_symbol^holding_pct" f1 = open(filepath, 'w') f1.write(columns) f1.write("\n") all_tables = soup.findAll('table', attrs={"class": "yfnc_tableout1"}) if (len(all_tables) > 1): holdings = all_tables[0].findAll('td', attrs={"class": "yfnc_tabledata1"}) count = 0 list_holding = [] for holding in holdings: list_holding.append(holding.text)
import talib from talib import MA_Type import numpy as np from talib import abstract import datetime from dateutil.relativedelta import relativedelta import pandas as pd from util import loglib import csv from util import constants import os from dao import mongodao from dao import dbdao from bl import rating_manager logger = loglib.getlogger('rating') def symbol_rating_calc(days_behind): end_date_time = datetime.datetime.now() # mysqldb.get_maxdate()[0] start_date_time = end_date_time - relativedelta(days=700) list_symbol=dbdao.get_symbols_list() df_mkt=mongodao.getsymbol_data_temp(constants.MKT_SYMBOL, start_date_time, end_date_time) for symbol in list_symbol: try: df_symbol=mongodao.getsymbol_data_temp(symbol, start_date_time, end_date_time) if df_symbol.empty:
from bl import news_manager from util import loglib logger = loglib.getlogger('news') logger.info("Retrieving news from all sources") filepath='data/news/ALL_NEWS.csv' news_manager.save_NEWS(filepath) logger.info("saved all news")