def remove_invalid_sym(): valid=savant.scraper.get_symbols("NYSE") valid += savant.scraper.get_symbols("NASDAQ") comps = Company.query.filter(~Company.symbol.in_(valid)).all() invalid_id = [x.id for x in comps] invalid_symbol = [x.symbol for x in comps] #print len(invalid_symbol) #iius = IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).all() if len(invalid_symbol) == 0: return his = HistoricalIPO.query.filter(HistoricalIPO.company_id.in_(invalid_id)).all() ticker = TickDataProcessor() for hi in his: datestr = hi.ipo_date.strftime('%Y%m%d') try: #remove them from tick data paths = ticker.get_ticks_paths_by_date(hi.company.symbol, datestr) for path in paths: if path != "": os.remove(path) except OSError: print "cannot find the file", path HistoricalIPO.query.filter(HistoricalIPO.company_id.in_(invalid_id)).delete(synchronize_session='fetch') session.commit() IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).delete(synchronize_session='fetch') session.commit() CompanyUnderwriterAssociation.query.filter(CompanyUnderwriterAssociation.company_id.in_(invalid_id)).delete(synchronize_session='fetch') session.commit() PostIPOPrice.query.filter(PostIPOPrice.company_id.in_(invalid_id)).delete(synchronize_session='fetch') session.commit() Company.query.filter(Company.id.in_(invalid_id)).delete(synchronize_session='fetch') session.commit()
def populate_ipo_finance(): ipo_urls = IPOInfoUrl.query.all() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: continue hi = HistoricalIPO.query.filter_by(company_id=comp.id).first() if not hi: continue if hi.revenue != None: print "Finance Data existed", url.symbol continue fin_url = url.url + '?tab=financials' fin_data = scrape_ipo_finance(fin_url) #print fin_data if len(fin_data.keys()) < 5: print comp.symbol, 'is not updated due to missing financial data', len( fin_data.keys()) continue hi.revenue = fin_data['revenue'] hi.net_income = fin_data['net_income'] hi.total_assets = fin_data['total_assets'] hi.total_liability = fin_data['total_liabilities'] hi.stakeholder_equity = fin_data['stockholders_equity'] session.commit()
def manual_add_ipo_url(): ipo_url = IPOInfoUrl( 'VOXELJET AG', 'VJET', 'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl( 'AERIE PHARMACEUTICALS INC', 'AERI', 'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508' ) session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl( 'ENDOCHOICE HOLDINGS, INC.', 'GI', 'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332' ) session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl( 'STG GROUP, INC.', 'GDEF', 'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501' ) session.add(ipo_url) session.commit() session.close()
def check_ipo_data_validity(): ipos = session.query( Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).all() for ipo in ipos: symbol = ipo.Company.symbol date = str(ipo.HistoricalIPO.ipo_date).replace('-', '') tick_gz_path = settings.DATA_HOME + '/data/' + date + '/' + symbol + '_markethours.csv.gz' if not os.path.exists(tick_gz_path): hi = HistoricalIPO.query.filter( HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity = 1 session.commit() else: o_at = o_yh = v_at = v_yh = 0 pips_at = PostIPOPriceAT.query.filter( PostIPOPriceAT.company_id == ipo.HistoricalIPO.company_id ).filter(PostIPOPriceAT.date == ipo.HistoricalIPO.ipo_date).all() pips_yh = PostIPOPriceYahoo.query.filter( PostIPOPriceYahoo.company_id == ipo.HistoricalIPO.company_id ).filter( PostIPOPriceYahoo.date == ipo.HistoricalIPO.ipo_date).all() if len(pips_at) > 0: o_at = pips_at[0].open v_at = pips_at[0].volume if len(pips_yh) > 0: o_yh = pips_yh[0].open v_yh = pips_yh[0].volume open_vol = ipo.HistoricalIPO.open_vol if v_at < v_yh / 1.2: print 'incomplete tick--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol hi = HistoricalIPO.query.filter( HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity = 2 session.commit() continue if ipo.HistoricalIPO.open_vol < 5000: #only if one of at or yh data is not present if min(v_at, v_yh) == 0 or float( max(v_at, v_yh) / min(v_at, v_yh)) > 1.2 or abs(o_at - o_yh) < 0.02: print 'suspicious volume--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol hi = HistoricalIPO.query.filter( HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity = 3 session.commit() continue #if float(max(v_at, v_yh))/min(v_at, v_yh) > 1.5 and float(max(v_at, v_yh)/min(v_at, v_yh))< 2.0: #if float(max(v_at, v_yh))/min(v_at, v_yh) < 1.2 : #vol match, does not matter if abs(o_at - o_yh) > 0.02: hi = HistoricalIPO.query.filter( HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity = 4 session.commit() print 'mismatch open--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol continue # open price match
def update_scoop_rate(): ipos = session.query(Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).all() for ipo in ipos: if ipo.HistoricalIPO.scoop_rating != 0: continue sym = ipo.Company.symbol rate = rate_finder(sym) if rate == None: continue if rate == 'N/A' or rate == 'N/C': rate = 0 hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first() if hi == None: continue #should not happen hi.scoop_rating = rate session.commit()
def get_ipo_url(): logging.basicConfig() log = logging.getLogger("savant") base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month=" count = 0 cur_date = date.today() oldest_date = date(2010, 1, 1) while cur_date >= oldest_date: log.info("Getting IPO urls for", cur_date.strftime("%Y-%m")) print "Getting IPO urls for", cur_date.strftime("%Y-%m") url = base_url + cur_date.strftime("%Y-%m") if cur_date.month != 1: cur_date = cur_date.replace(month=cur_date.month-1) else: cur_date = cur_date.replace(year=cur_date.year-1, month=12) try: soup = scraper.get_soup(url) except: log.info("Could not reach url") continue table = soup.find("div", {"class": "genTable"}) if "no data" in table.text: log.info("No data for %s" % cur_date.strftime("%Y-%m")) continue rows = table.tbody.find_all("tr") for row in rows: tds = row.find_all("td") name = tds[0].text url = tds[0].a["href"] symbol = tds[1].text ipo_url = IPOInfoUrl(name, symbol, url) if IPOInfoUrl.query.filter_by(name=name).first() is not None: continue if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None: continue session.add(ipo_url) session.commit() session.close()
def get_ipo_url(): logging.basicConfig() log = logging.getLogger("savant") base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month=" count = 0 cur_date = date.today() oldest_date = date(2010, 1, 1) while cur_date >= oldest_date: log.info("Getting IPO urls for", cur_date.strftime("%Y-%m")) print "Getting IPO urls for", cur_date.strftime("%Y-%m") url = base_url + cur_date.strftime("%Y-%m") if cur_date.month != 1: cur_date = cur_date.replace(month=cur_date.month - 1) else: cur_date = cur_date.replace(year=cur_date.year - 1, month=12) try: soup = scraper.get_soup(url) except: log.info("Could not reach url") continue table = soup.find("div", {"class": "genTable"}) if "no data" in table.text: log.info("No data for %s" % cur_date.strftime("%Y-%m")) continue rows = table.tbody.find_all("tr") for row in rows: tds = row.find_all("td") name = tds[0].text url = tds[0].a["href"] symbol = tds[1].text ipo_url = IPOInfoUrl(name, symbol, url) if IPOInfoUrl.query.filter_by(name=name).first() is not None: continue if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None: continue session.add(ipo_url) session.commit() session.close()
def populate_ipo_underwriter(): ipo_urls = IPOInfoUrl.query.all() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: continue hi = HistoricalIPO.query.filter_by(company_id=comp.id).first() if not hi: continue assoc = CompanyUnderwriterAssociation.query.filter_by( company_id=comp.id).first() if assoc: print "underwriter for", url.symbol, "already in database" continue exp_url = url.url + '?tab=experts' uw_data = scrape_ipo_underwriter(exp_url) if uw_data == None: continue print url.symbol, uw_data for i in [0, 1]: for uw_item in uw_data[i]: if not uw_item: continue uw = Underwriter.query.filter_by(name=uw_item).first() if not uw: uw = Underwriter(name=uw_item) session.add(uw) session.commit() assoc = CompanyUnderwriterAssociation.query.filter_by( company_id=comp.id).filter_by( underwriter_id=uw.id).first() if assoc: continue if i == 0: lead = True else: lead = False assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead) session.add(assoc) session.commit()
def remove_invalid_sym(): valid = savant.scraper.get_symbols("NYSE") valid += savant.scraper.get_symbols("NASDAQ") comps = Company.query.filter(~Company.symbol.in_(valid)).all() invalid_id = [x.id for x in comps] invalid_symbol = [x.symbol for x in comps] #print len(invalid_symbol) #iius = IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).all() if len(invalid_symbol) == 0: return his = HistoricalIPO.query.filter( HistoricalIPO.company_id.in_(invalid_id)).all() ticker = TickDataProcessor() for hi in his: datestr = hi.ipo_date.strftime('%Y%m%d') try: #remove them from tick data paths = ticker.get_ticks_paths_by_date(hi.company.symbol, datestr) for path in paths: if path != "": os.remove(path) except OSError: print "cannot find the file", path HistoricalIPO.query.filter( HistoricalIPO.company_id.in_(invalid_id)).delete( synchronize_session='fetch') session.commit() IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).delete( synchronize_session='fetch') session.commit() CompanyUnderwriterAssociation.query.filter( CompanyUnderwriterAssociation.company_id.in_(invalid_id)).delete( synchronize_session='fetch') session.commit() PostIPOPrice.query.filter(PostIPOPrice.company_id.in_(invalid_id)).delete( synchronize_session='fetch') session.commit() Company.query.filter( Company.id.in_(invalid_id)).delete(synchronize_session='fetch') session.commit()
def populate_ipo_underwriter(): ipo_urls = IPOInfoUrl.query.all() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: continue hi = HistoricalIPO.query.filter_by(company_id=comp.id).first() if not hi: continue assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).first() if assoc: print "underwriter for", url.symbol, "already in database" continue exp_url = url.url + '?tab=experts' uw_data = scrape_ipo_underwriter(exp_url) if uw_data == None: continue print url.symbol, uw_data for i in [0,1]: for uw_item in uw_data[i]: if not uw_item: continue uw = Underwriter.query.filter_by(name = uw_item).first() if not uw: uw = Underwriter(name=uw_item) session.add(uw) session.commit() assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).filter_by(underwriter_id = uw.id).first() if assoc: continue if i == 0: lead = True else: lead = False assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead) session.add(assoc) session.commit()
def update_ipo_tick_info(): ipos = session.query( Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).filter( HistoricalIPO.open_vol == None).all() for ipo in ipos: sym = ipo.Company.symbol ipo_date = ipo.HistoricalIPO.ipo_date.strftime('%Y%m%d') ipo_data_dir = os.path.join(tickdata_dir, ipo_date) ipo_data_path = os.path.join(ipo_data_dir, "%s_markethours.csv.gz" % sym) if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path): #handle exceptions. WLH has trades prior IPO and it does not have market open signal. # if sym == 'WLH': # open_vol = 1188834 # elif sym == 'FCAU': # open_vol = 242453 # else: print sym ticks = data_processor.get_ticks_by_date(sym, ipo_date, ipo_date) analyzer = TickDataAnalyzer(ticks) open_vol = analyzer.get_open_vol() hi = HistoricalIPO.query.filter( HistoricalIPO.company_id == ipo.Company.id).first() if hi == None: continue #should not happen hi.open_vol = open_vol hi.first_opening_price = analyzer.get_opening_price() hi.first_closing_price = analyzer.get_closing_price() hi.first_trade_time = analyzer.get_first_trade_time() hi.first_day_high = analyzer.get_high_price() hi.first_day_low = analyzer.get_low_price() hi.first_day_high_percent_change = analyzer.get_high_percent_change( ) hi.first_day_low_percent_change = analyzer.get_low_percent_change() hi.first_day_volume = analyzer.get_volume() #print open_vol session.commit()
def check_ipo_data_validity(): ipos = session.query(Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).all() for ipo in ipos: symbol = ipo.Company.symbol date = str(ipo.HistoricalIPO.ipo_date).replace('-', '') tick_gz_path = settings.DATA_HOME + '/data/' + date + '/' + symbol + '_markethours.csv.gz' if not os.path.exists(tick_gz_path): hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity= 1 session.commit() else: o_at = o_yh = v_at = v_yh = 0 pips_at = PostIPOPriceAT.query.filter(PostIPOPriceAT.company_id ==ipo.HistoricalIPO.company_id).filter(PostIPOPriceAT.date == ipo.HistoricalIPO.ipo_date).all() pips_yh = PostIPOPriceYahoo.query.filter(PostIPOPriceYahoo.company_id ==ipo.HistoricalIPO.company_id).filter(PostIPOPriceYahoo.date == ipo.HistoricalIPO.ipo_date).all() if len(pips_at) > 0: o_at = pips_at[0].open v_at = pips_at[0].volume if len(pips_yh) > 0: o_yh = pips_yh[0].open v_yh = pips_yh[0].volume open_vol = ipo.HistoricalIPO.open_vol if v_at < v_yh/1.2: print 'incomplete tick--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity= 2 session.commit() continue if ipo.HistoricalIPO.open_vol < 5000: #only if one of at or yh data is not present if min(v_at, v_yh) == 0 or float(max(v_at, v_yh)/min(v_at, v_yh)) > 1.2 or abs(o_at - o_yh) < 0.02: print 'suspicious volume--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity= 3 session.commit() continue #if float(max(v_at, v_yh))/min(v_at, v_yh) > 1.5 and float(max(v_at, v_yh)/min(v_at, v_yh))< 2.0: #if float(max(v_at, v_yh))/min(v_at, v_yh) < 1.2 : #vol match, does not matter if abs(o_at - o_yh) > 0.02: hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first() if hi is not None: hi.validity= 4 session.commit() print 'mismatch open--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol continue # open price match
def populate_ipo_finance(): ipo_urls = IPOInfoUrl.query.all() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: continue hi = HistoricalIPO.query.filter_by(company_id=comp.id).first() if not hi: continue if hi.revenue != None: print "Finance Data existed", url.symbol continue fin_url = url.url + '?tab=financials' fin_data = scrape_ipo_finance(fin_url) #print fin_data if len(fin_data.keys())< 5: print comp.symbol, 'is not updated due to missing financial data', len(fin_data.keys()) continue hi.revenue = fin_data['revenue'] hi.net_income = fin_data['net_income'] hi.total_assets = fin_data['total_assets'] hi.total_liability= fin_data['total_liabilities'] hi.stakeholder_equity = fin_data['stockholders_equity'] session.commit()
def get_company_overview(symbol): existing = Company.query.filter_by(symbol=symbol).first() if existing: return existing data = scrape_nasdaq(symbol) in_nas = 1 if not data: return None elif len(data.keys()) == 1: in_nas = 2 data.update(scrape_yahoo(symbol, full=True)) else: in_nas = 3 data.update(scrape_yahoo(symbol)) if len(data) == 1: return None if data["symbol"]=='AMBI': print symbol, in_nas #existing = Company.query.filter_by(name=data["name"]).first() #if existing: # return existing if "exchange" in data: exch = Exchange.query.filter_by(name=data["exchange"]).first() if not exch: exch = Exchange(name=data["exchange"]) session.add(exch) session.commit() del data["exchange"] data["exchange_id"] = exch.id if "industry" in data: indus = Industry.query.filter_by(name=data["industry"]).first() if not indus: indus = Industry(name=data["industry"]) session.add(indus) session.commit() del data["industry"] data["industry_id"] = indus.id if "sector" in data: sect = Sector.query.filter_by(name=data["sector"]).first() if not sect: sect = Sector(name=data["sector"]) session.add(sect) session.commit() del data["sector"] data["sector_id"] = sect.id comp = Company(**data) return comp
def manual_add_ipo_url(): ipo_url = IPOInfoUrl('VOXELJET AG', 'VJET', 'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl('AERIE PHARMACEUTICALS INC', 'AERI', 'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl('ENDOCHOICE HOLDINGS, INC.', 'GI', 'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl('STG GROUP, INC.', 'GDEF', 'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501') session.add(ipo_url) session.commit() session.close()
def patch_ipo_date(): wrong_ipo = { 'HHC': '20101105', 'CHKR': '20111111', 'NLNK': '20111111', # 'WLH': '20130516', # 'RTRX': '20140110', # 'RXDX': '20140314', 'VGGL': '', # 'FCAU': '20141212', 'BLMT': '20111005', 'XELB': ''} res = session.query(Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).filter(Company.symbol.in_(wrong_ipo.keys())).all() tickdata_dir = settings.DOWNLOAD_DIR ticker = TickDataProcessor() for r in res: symbol = r.Company.symbol id = r.Company.id #remove them from tick data datestr = r.HistoricalIPO.ipo_date.strftime('%Y%m%d') try: paths = ticker.get_ticks_paths_by_date(r.Company.symbol, datestr) for path in paths: if path != "": os.remove(path) except OSError: print "cannot find the file", path if wrong_ipo[symbol] == "": # remove the data and remove the symbol from ipo related tables as this is not an actual IPO, might be SPO HistoricalIPO.query.filter(HistoricalIPO.company_id == id).delete(synchronize_session='fetch') session.commit() IPOInfoUrl.query.filter(IPOInfoUrl.symbol==symbol).delete(synchronize_session='fetch') session.commit() PostIPOPrice.query.filter(PostIPOPrice.company_id==id).delete(synchronize_session='fetch') session.commit() else: hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == id).first() hi.ipo_date=datetime.strptime(wrong_ipo[symbol], '%Y%m%d').date() session.commit() #fe tch data ipo_data_dir = os.path.join(tickdata_dir, wrong_ipo[symbol]) ipo_data_path = os.path.join(ipo_data_dir, "%s_markethours.csv.gz" % symbol) if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path): print "IPO data found" else: request = {"command": "get", "symbol": symbol, "date": wrong_ipo[symbol], "gettrade": "true", "getquote": "true"} print request print cjson.encode(request) fetcher_caller = fetcher.FetcherCaller() fetcher_caller.set_request(cjson.encode(request)) try: response = fetcher_caller.send_request() fetcher_caller.close() except: print "Unable to send fetch request" continue count_down = 60 fetched = False while count_down > 0: if os.path.exists(ipo_data_path): print "IPO data fetched:", symbol fetched = True time.sleep(5) break time.sleep(1) count_down -= 1 if not fetched: print "Unable to download data for", symbol
def patch_ipo_date(): wrong_ipo = { 'HHC': '20101105', 'CHKR': '20111111', 'NLNK': '20111111', # 'WLH': '20130516', # 'RTRX': '20140110', # 'RXDX': '20140314', 'VGGL': '', # 'FCAU': '20141212', 'BLMT': '20111005', 'XELB': '' } res = session.query( Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).filter( Company.symbol.in_(wrong_ipo.keys())).all() tickdata_dir = settings.DOWNLOAD_DIR ticker = TickDataProcessor() for r in res: symbol = r.Company.symbol id = r.Company.id #remove them from tick data datestr = r.HistoricalIPO.ipo_date.strftime('%Y%m%d') try: paths = ticker.get_ticks_paths_by_date(r.Company.symbol, datestr) for path in paths: if path != "": os.remove(path) except OSError: print "cannot find the file", path if wrong_ipo[symbol] == "": # remove the data and remove the symbol from ipo related tables as this is not an actual IPO, might be SPO HistoricalIPO.query.filter(HistoricalIPO.company_id == id).delete( synchronize_session='fetch') session.commit() IPOInfoUrl.query.filter(IPOInfoUrl.symbol == symbol).delete( synchronize_session='fetch') session.commit() PostIPOPrice.query.filter(PostIPOPrice.company_id == id).delete( synchronize_session='fetch') session.commit() else: hi = HistoricalIPO.query.filter( HistoricalIPO.company_id == id).first() hi.ipo_date = datetime.strptime(wrong_ipo[symbol], '%Y%m%d').date() session.commit() #fe tch data ipo_data_dir = os.path.join(tickdata_dir, wrong_ipo[symbol]) ipo_data_path = os.path.join(ipo_data_dir, "%s_markethours.csv.gz" % symbol) if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path): print "IPO data found" else: request = { "command": "get", "symbol": symbol, "date": wrong_ipo[symbol], "gettrade": "true", "getquote": "true" } print request print cjson.encode(request) fetcher_caller = fetcher.FetcherCaller() fetcher_caller.set_request(cjson.encode(request)) try: response = fetcher_caller.send_request() fetcher_caller.close() except: print "Unable to send fetch request" continue count_down = 60 fetched = False while count_down > 0: if os.path.exists(ipo_data_path): print "IPO data fetched:", symbol fetched = True time.sleep(5) break time.sleep(1) count_down -= 1 if not fetched: print "Unable to download data for", symbol
#unwr_dict = scraper.get_underwriters() count = 0 for symbol in symbols: count += 1 if count % 10 == 0: print count if "-" in symbol: continue comp = scraper.get_company_overview(symbol) # if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first(): if comp: if not Company.query.filter_by(symbol=comp.symbol).first(): session.add(comp) session.commit() else: print "Company exists in db" """ if symbol in unwr_dict: underwriters = [u.strip() for u in unwr_dict[symbol].split("/")] for u in underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit() a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True) comp.underwriters.append(a)
#unwr_dict = scraper.get_underwriters() count = 0 for symbol in symbols: count += 1 if count % 10 == 0: print count if "-" in symbol: continue comp = scraper.get_company_overview(symbol) # if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first(): if comp: if not Company.query.filter_by(symbol=comp.symbol).first(): session.add(comp) session.commit() else: print "Company exists in db" """ if symbol in unwr_dict: underwriters = [u.strip() for u in unwr_dict[symbol].split("/")] for u in underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit()
def populate_ipo_table(): ipo_urls = IPOInfoUrl.query.all() known_unwrs = set() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: # session.add(comp) # session.commit() continue if HistoricalIPO.query.filter_by(company_id=comp.id).first(): print "Data exists for:", url.symbol continue # comp = get_company_overview(url.symbol) # if not comp: # log.warning("Cannot get company info for %s" % url.symbol) # continue ipo_data = scrape_ipo(url.url) if ipo_data == {}: continue log.info("IPO data from NASDAQ.com:\n%s" % cjson.encode(ipo_data)) underwriters = ipo_data["underwriters"] lead_underwriters = ipo_data["lead_underwriters"] del ipo_data["underwriters"] del ipo_data["lead_underwriters"] ipo_date = ipo_data["ipo_date"] try: month, day, year = [int(i) for i in ipo_date.split("/")] ipo_date = datetime.date(year, month, day).strftime("%Y%m%d") #ipo_data["ipo_date"] = datetime.date(year, month, day).strftime("%Y-%m-%d") ipo_data["ipo_date"] = datetime.date(year, month, day) except: log.error("Error in IPO date:%s" % url.symbol) continue ipo_data_dir = os.path.join(tickdata_dir, ipo_date) ipo_data_path = os.path.join(ipo_data_dir, "%s_markethours.csv.gz" % url.symbol) exist = False if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path): exist = True log.info("IPO data found") else: request = { "command": "get", "symbol": url.symbol, "date": ipo_date, "gettrade": "true", "getquote": "true" } try: fetcher_caller = fetcher.FetcherCaller() fetcher_caller.set_request(cjson.encode(request)) response = fetcher_caller.send_request() fetcher_caller.close() except: log.error("Unable to send fetch request") continue count_down = 60 fetched = False while count_down > 0: if os.path.exists(ipo_data_path): log.info("IPO data fetched: %s" % url.symbol) fetched = True time.sleep(5) break time.sleep(1) count_down -= 1 if not fetched: log.error("Unable to download data for %s" % url.symbol) if exist or fetched: itd = process_ipo_tick_data(symbol, ipo_date) ipo_data["open_vol"] = itd["open_vol"] ipo_data["first_opening_price"] = itd["first_opening_price"] ipo_data["first_closing_price"] = itd["first_closing_price"] ipo_data["first_trade_time"] = itd["first_trade_time"] ipo_data["first_day_high"] = itd["first_day_high"] ipo_data["first_day_low"] = itd["first_day_low"] ipo_data["first_day_high_percent_change"] = itd[ "first_day_high_percent_change"] ipo_data["first_day_low_percent_change"] = itd[ "first_day_low_percent_change"] ipo_data["first_day_volume"] = itd["first_day_volume"] else: ipo_data["open_vol"] = None ipo_data["first_opening_price"] = None ipo_data["first_closing_price"] = None ipo_data["first_trade_time"] = None ipo_data["first_day_high"] = None ipo_data["first_day_low"] = None ipo_data["first_day_high_percent_change"] = None ipo_data["first_day_low_percent_change"] = None ipo_data["first_day_volume"] = None ipo_data["scoop_rating"] = 0 ipo_data["company_id"] = comp.id log.info("Final IPO data for %s:\n%s" % (url.symbol, ipo_data)) """ for u in underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit() a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=False) comp.underwriters.append(a) session.commit() for u in lead_underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit() a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True) comp.underwriters.append(a) session.commit() """ historical_ipo = HistoricalIPO(**ipo_data) session.add(historical_ipo) session.commit()