def manual_add_ipo_url(): ipo_url = IPOInfoUrl( 'VOXELJET AG', 'VJET', 'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl( 'AERIE PHARMACEUTICALS INC', 'AERI', 'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508' ) session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl( 'ENDOCHOICE HOLDINGS, INC.', 'GI', 'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332' ) session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl( 'STG GROUP, INC.', 'GDEF', 'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501' ) session.add(ipo_url) session.commit() session.close()
def get_ipo_url(): logging.basicConfig() log = logging.getLogger("savant") base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month=" count = 0 cur_date = date.today() oldest_date = date(2010, 1, 1) while cur_date >= oldest_date: log.info("Getting IPO urls for", cur_date.strftime("%Y-%m")) print "Getting IPO urls for", cur_date.strftime("%Y-%m") url = base_url + cur_date.strftime("%Y-%m") if cur_date.month != 1: cur_date = cur_date.replace(month=cur_date.month-1) else: cur_date = cur_date.replace(year=cur_date.year-1, month=12) try: soup = scraper.get_soup(url) except: log.info("Could not reach url") continue table = soup.find("div", {"class": "genTable"}) if "no data" in table.text: log.info("No data for %s" % cur_date.strftime("%Y-%m")) continue rows = table.tbody.find_all("tr") for row in rows: tds = row.find_all("td") name = tds[0].text url = tds[0].a["href"] symbol = tds[1].text ipo_url = IPOInfoUrl(name, symbol, url) if IPOInfoUrl.query.filter_by(name=name).first() is not None: continue if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None: continue session.add(ipo_url) session.commit() session.close()
def get_ipo_url(): logging.basicConfig() log = logging.getLogger("savant") base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month=" count = 0 cur_date = date.today() oldest_date = date(2010, 1, 1) while cur_date >= oldest_date: log.info("Getting IPO urls for", cur_date.strftime("%Y-%m")) print "Getting IPO urls for", cur_date.strftime("%Y-%m") url = base_url + cur_date.strftime("%Y-%m") if cur_date.month != 1: cur_date = cur_date.replace(month=cur_date.month - 1) else: cur_date = cur_date.replace(year=cur_date.year - 1, month=12) try: soup = scraper.get_soup(url) except: log.info("Could not reach url") continue table = soup.find("div", {"class": "genTable"}) if "no data" in table.text: log.info("No data for %s" % cur_date.strftime("%Y-%m")) continue rows = table.tbody.find_all("tr") for row in rows: tds = row.find_all("td") name = tds[0].text url = tds[0].a["href"] symbol = tds[1].text ipo_url = IPOInfoUrl(name, symbol, url) if IPOInfoUrl.query.filter_by(name=name).first() is not None: continue if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None: continue session.add(ipo_url) session.commit() session.close()
def populate_ipo_underwriter(): ipo_urls = IPOInfoUrl.query.all() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: continue hi = HistoricalIPO.query.filter_by(company_id=comp.id).first() if not hi: continue assoc = CompanyUnderwriterAssociation.query.filter_by( company_id=comp.id).first() if assoc: print "underwriter for", url.symbol, "already in database" continue exp_url = url.url + '?tab=experts' uw_data = scrape_ipo_underwriter(exp_url) if uw_data == None: continue print url.symbol, uw_data for i in [0, 1]: for uw_item in uw_data[i]: if not uw_item: continue uw = Underwriter.query.filter_by(name=uw_item).first() if not uw: uw = Underwriter(name=uw_item) session.add(uw) session.commit() assoc = CompanyUnderwriterAssociation.query.filter_by( company_id=comp.id).filter_by( underwriter_id=uw.id).first() if assoc: continue if i == 0: lead = True else: lead = False assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead) session.add(assoc) session.commit()
def populate_ipo_underwriter(): ipo_urls = IPOInfoUrl.query.all() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: continue hi = HistoricalIPO.query.filter_by(company_id=comp.id).first() if not hi: continue assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).first() if assoc: print "underwriter for", url.symbol, "already in database" continue exp_url = url.url + '?tab=experts' uw_data = scrape_ipo_underwriter(exp_url) if uw_data == None: continue print url.symbol, uw_data for i in [0,1]: for uw_item in uw_data[i]: if not uw_item: continue uw = Underwriter.query.filter_by(name = uw_item).first() if not uw: uw = Underwriter(name=uw_item) session.add(uw) session.commit() assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).filter_by(underwriter_id = uw.id).first() if assoc: continue if i == 0: lead = True else: lead = False assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead) session.add(assoc) session.commit()
def get_company_overview(symbol): existing = Company.query.filter_by(symbol=symbol).first() if existing: return existing data = scrape_nasdaq(symbol) in_nas = 1 if not data: return None elif len(data.keys()) == 1: in_nas = 2 data.update(scrape_yahoo(symbol, full=True)) else: in_nas = 3 data.update(scrape_yahoo(symbol)) if len(data) == 1: return None if data["symbol"]=='AMBI': print symbol, in_nas #existing = Company.query.filter_by(name=data["name"]).first() #if existing: # return existing if "exchange" in data: exch = Exchange.query.filter_by(name=data["exchange"]).first() if not exch: exch = Exchange(name=data["exchange"]) session.add(exch) session.commit() del data["exchange"] data["exchange_id"] = exch.id if "industry" in data: indus = Industry.query.filter_by(name=data["industry"]).first() if not indus: indus = Industry(name=data["industry"]) session.add(indus) session.commit() del data["industry"] data["industry_id"] = indus.id if "sector" in data: sect = Sector.query.filter_by(name=data["sector"]).first() if not sect: sect = Sector(name=data["sector"]) session.add(sect) session.commit() del data["sector"] data["sector_id"] = sect.id comp = Company(**data) return comp
def manual_add_ipo_url(): ipo_url = IPOInfoUrl('VOXELJET AG', 'VJET', 'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl('AERIE PHARMACEUTICALS INC', 'AERI', 'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl('ENDOCHOICE HOLDINGS, INC.', 'GI', 'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332') session.add(ipo_url) session.commit() ipo_url = IPOInfoUrl('STG GROUP, INC.', 'GDEF', 'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501') session.add(ipo_url) session.commit() session.close()
#unwr_dict = scraper.get_underwriters() count = 0 for symbol in symbols: count += 1 if count % 10 == 0: print count if "-" in symbol: continue comp = scraper.get_company_overview(symbol) # if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first(): if comp: if not Company.query.filter_by(symbol=comp.symbol).first(): session.add(comp) session.commit() else: print "Company exists in db" """ if symbol in unwr_dict: underwriters = [u.strip() for u in unwr_dict[symbol].split("/")] for u in underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr)
def populate_ipo_table(): ipo_urls = IPOInfoUrl.query.all() known_unwrs = set() for url in ipo_urls: comp = Company.query.filter_by(symbol=url.symbol).first() if not comp: # session.add(comp) # session.commit() continue if HistoricalIPO.query.filter_by(company_id=comp.id).first(): print "Data exists for:", url.symbol continue # comp = get_company_overview(url.symbol) # if not comp: # log.warning("Cannot get company info for %s" % url.symbol) # continue ipo_data = scrape_ipo(url.url) if ipo_data == {}: continue log.info("IPO data from NASDAQ.com:\n%s" % cjson.encode(ipo_data)) underwriters = ipo_data["underwriters"] lead_underwriters = ipo_data["lead_underwriters"] del ipo_data["underwriters"] del ipo_data["lead_underwriters"] ipo_date = ipo_data["ipo_date"] try: month, day, year = [int(i) for i in ipo_date.split("/")] ipo_date = datetime.date(year, month, day).strftime("%Y%m%d") #ipo_data["ipo_date"] = datetime.date(year, month, day).strftime("%Y-%m-%d") ipo_data["ipo_date"] = datetime.date(year, month, day) except: log.error("Error in IPO date:%s" % url.symbol) continue ipo_data_dir = os.path.join(tickdata_dir, ipo_date) ipo_data_path = os.path.join(ipo_data_dir, "%s_markethours.csv.gz" % url.symbol) exist = False if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path): exist = True log.info("IPO data found") else: request = { "command": "get", "symbol": url.symbol, "date": ipo_date, "gettrade": "true", "getquote": "true" } try: fetcher_caller = fetcher.FetcherCaller() fetcher_caller.set_request(cjson.encode(request)) response = fetcher_caller.send_request() fetcher_caller.close() except: log.error("Unable to send fetch request") continue count_down = 60 fetched = False while count_down > 0: if os.path.exists(ipo_data_path): log.info("IPO data fetched: %s" % url.symbol) fetched = True time.sleep(5) break time.sleep(1) count_down -= 1 if not fetched: log.error("Unable to download data for %s" % url.symbol) if exist or fetched: itd = process_ipo_tick_data(symbol, ipo_date) ipo_data["open_vol"] = itd["open_vol"] ipo_data["first_opening_price"] = itd["first_opening_price"] ipo_data["first_closing_price"] = itd["first_closing_price"] ipo_data["first_trade_time"] = itd["first_trade_time"] ipo_data["first_day_high"] = itd["first_day_high"] ipo_data["first_day_low"] = itd["first_day_low"] ipo_data["first_day_high_percent_change"] = itd[ "first_day_high_percent_change"] ipo_data["first_day_low_percent_change"] = itd[ "first_day_low_percent_change"] ipo_data["first_day_volume"] = itd["first_day_volume"] else: ipo_data["open_vol"] = None ipo_data["first_opening_price"] = None ipo_data["first_closing_price"] = None ipo_data["first_trade_time"] = None ipo_data["first_day_high"] = None ipo_data["first_day_low"] = None ipo_data["first_day_high_percent_change"] = None ipo_data["first_day_low_percent_change"] = None ipo_data["first_day_volume"] = None ipo_data["scoop_rating"] = 0 ipo_data["company_id"] = comp.id log.info("Final IPO data for %s:\n%s" % (url.symbol, ipo_data)) """ for u in underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit() a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=False) comp.underwriters.append(a) session.commit() for u in lead_underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit() a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True) comp.underwriters.append(a) session.commit() """ historical_ipo = HistoricalIPO(**ipo_data) session.add(historical_ipo) session.commit()
#else: # data.update(scraper.scrape_yahoo(symbol)) if len(data) == 1: continue if Company.query.filter_by(name=data["name"]).first(): continue print data print known_exchs if "exchange" in data: if data["exchange"] not in known_exchs: print >> sys.stderr, data["exchange"] exch = Exchange(name=data["exchange"]) session.add(exch) session.commit() known_exchs.add(data["exchange"]) else: exch = Exchange.query.filter_by(name=data["exchange"]).first() del data["exchange"] data["exchange_id"] = exch.id if "industry" in data: if data["industry"] not in known_industries: indus = Industry(name=data["industry"]) session.add(indus) session.commit() known_industries.add(data["industry"]) else: indus = Industry.query.filter_by(name=data["industry"]).first()
#unwr_dict = scraper.get_underwriters() count = 0 for symbol in symbols: count += 1 if count % 10 == 0: print count if "-" in symbol: continue comp = scraper.get_company_overview(symbol) # if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first(): if comp: if not Company.query.filter_by(symbol=comp.symbol).first(): session.add(comp) session.commit() else: print "Company exists in db" """ if symbol in unwr_dict: underwriters = [u.strip() for u in unwr_dict[symbol].split("/")] for u in underwriters: if u in known_unwrs: unwr = Underwriter.query.filter_by(name=u).first() else: unwr = Underwriter(u) known_unwrs.add(u) session.add(unwr) session.commit() a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True)