def remove_invalid_sym():
    valid=savant.scraper.get_symbols("NYSE")
    valid += savant.scraper.get_symbols("NASDAQ")
    comps = Company.query.filter(~Company.symbol.in_(valid)).all()
    invalid_id = [x.id for x in comps]
    invalid_symbol = [x.symbol for x in comps]
    #print len(invalid_symbol)
    #iius = IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).all()
    if len(invalid_symbol) == 0:
       return
    his = HistoricalIPO.query.filter(HistoricalIPO.company_id.in_(invalid_id)).all()
    ticker = TickDataProcessor()
    for hi in his:
       datestr = hi.ipo_date.strftime('%Y%m%d')
       try:
         #remove them from tick data
         paths = ticker.get_ticks_paths_by_date(hi.company.symbol, datestr)
         for path in paths:
             if path != "":
                  os.remove(path)
       except OSError:
         print "cannot find the file", path 

    HistoricalIPO.query.filter(HistoricalIPO.company_id.in_(invalid_id)).delete(synchronize_session='fetch')
    session.commit()
    IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).delete(synchronize_session='fetch')
    session.commit()
    CompanyUnderwriterAssociation.query.filter(CompanyUnderwriterAssociation.company_id.in_(invalid_id)).delete(synchronize_session='fetch')
    session.commit()
    PostIPOPrice.query.filter(PostIPOPrice.company_id.in_(invalid_id)).delete(synchronize_session='fetch')
    session.commit()
    Company.query.filter(Company.id.in_(invalid_id)).delete(synchronize_session='fetch')
    session.commit()
def populate_ipo_finance():
    ipo_urls = IPOInfoUrl.query.all()
    for url in ipo_urls:
        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp:
            continue
        hi = HistoricalIPO.query.filter_by(company_id=comp.id).first()
        if not hi:
            continue
        if hi.revenue != None:
            print "Finance Data existed", url.symbol
            continue
        fin_url = url.url + '?tab=financials'
        fin_data = scrape_ipo_finance(fin_url)
        #print fin_data
        if len(fin_data.keys()) < 5:
            print comp.symbol, 'is not updated due to missing financial data', len(
                fin_data.keys())
            continue
        hi.revenue = fin_data['revenue']
        hi.net_income = fin_data['net_income']
        hi.total_assets = fin_data['total_assets']
        hi.total_liability = fin_data['total_liabilities']
        hi.stakeholder_equity = fin_data['stockholders_equity']
        session.commit()
def manual_add_ipo_url():
    ipo_url = IPOInfoUrl(
        'VOXELJET AG', 'VJET',
        'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505')
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl(
        'AERIE PHARMACEUTICALS INC', 'AERI',
        'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508'
    )
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl(
        'ENDOCHOICE HOLDINGS, INC.', 'GI',
        'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332'
    )
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl(
        'STG GROUP, INC.', 'GDEF',
        'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501'
    )
    session.add(ipo_url)
    session.commit()

    session.close()
def check_ipo_data_validity():
    ipos = session.query(
        Company,
        HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).all()
    for ipo in ipos:
        symbol = ipo.Company.symbol
        date = str(ipo.HistoricalIPO.ipo_date).replace('-', '')
        tick_gz_path = settings.DATA_HOME + '/data/' + date + '/' + symbol + '_markethours.csv.gz'
        if not os.path.exists(tick_gz_path):
            hi = HistoricalIPO.query.filter(
                HistoricalIPO.company_id == ipo.Company.id).first()
            if hi is not None:
                hi.validity = 1
                session.commit()
        else:
            o_at = o_yh = v_at = v_yh = 0
            pips_at = PostIPOPriceAT.query.filter(
                PostIPOPriceAT.company_id == ipo.HistoricalIPO.company_id
            ).filter(PostIPOPriceAT.date == ipo.HistoricalIPO.ipo_date).all()
            pips_yh = PostIPOPriceYahoo.query.filter(
                PostIPOPriceYahoo.company_id == ipo.HistoricalIPO.company_id
            ).filter(
                PostIPOPriceYahoo.date == ipo.HistoricalIPO.ipo_date).all()
            if len(pips_at) > 0:
                o_at = pips_at[0].open
                v_at = pips_at[0].volume
            if len(pips_yh) > 0:
                o_yh = pips_yh[0].open
                v_yh = pips_yh[0].volume
            open_vol = ipo.HistoricalIPO.open_vol
            if v_at < v_yh / 1.2:
                print 'incomplete tick--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol
                hi = HistoricalIPO.query.filter(
                    HistoricalIPO.company_id == ipo.Company.id).first()
                if hi is not None:
                    hi.validity = 2
                    session.commit()
                continue
            if ipo.HistoricalIPO.open_vol < 5000:
                #only if one of at or yh data is not present
                if min(v_at, v_yh) == 0 or float(
                        max(v_at, v_yh) /
                        min(v_at, v_yh)) > 1.2 or abs(o_at - o_yh) < 0.02:
                    print 'suspicious volume--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol
                    hi = HistoricalIPO.query.filter(
                        HistoricalIPO.company_id == ipo.Company.id).first()
                    if hi is not None:
                        hi.validity = 3
                    session.commit()
                    continue
            #if float(max(v_at, v_yh))/min(v_at, v_yh) > 1.5 and float(max(v_at, v_yh)/min(v_at, v_yh))< 2.0:
            #if float(max(v_at, v_yh))/min(v_at, v_yh) < 1.2 :   #vol match, does not matter
                if abs(o_at - o_yh) > 0.02:
                    hi = HistoricalIPO.query.filter(
                        HistoricalIPO.company_id == ipo.Company.id).first()
                    if hi is not None:
                        hi.validity = 4
                    session.commit()
                    print 'mismatch open--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol
                    continue  # open price match
Beispiel #5
0
def update_scoop_rate():
    ipos  = session.query(Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).all() 
    for ipo in ipos:
        if ipo.HistoricalIPO.scoop_rating != 0:
            continue
        sym = ipo.Company.symbol
        rate = rate_finder(sym)
        if rate == None:
            continue
        if rate == 'N/A' or rate == 'N/C':
            rate = 0
        hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first()
        if hi == None:
            continue #should not happen
        hi.scoop_rating = rate
    session.commit()    
def get_ipo_url():
    logging.basicConfig()
    log = logging.getLogger("savant")
    
    base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month="
    
    count = 0
    cur_date = date.today()
    oldest_date = date(2010, 1, 1)
    
    while cur_date >= oldest_date:
        log.info("Getting IPO urls for", cur_date.strftime("%Y-%m"))
        print "Getting IPO urls for", cur_date.strftime("%Y-%m")
        url = base_url + cur_date.strftime("%Y-%m")
        if cur_date.month != 1:
            cur_date = cur_date.replace(month=cur_date.month-1)
        else:
            cur_date = cur_date.replace(year=cur_date.year-1, month=12)
    
        try:
            soup = scraper.get_soup(url)
        except:
            log.info("Could not reach url")
            continue
    
        table = soup.find("div", {"class": "genTable"})
        if "no data" in table.text:
            log.info("No data for %s" % cur_date.strftime("%Y-%m"))
            continue
    
        rows = table.tbody.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            name = tds[0].text
            url = tds[0].a["href"]
            symbol = tds[1].text
            ipo_url = IPOInfoUrl(name, symbol, url)
            if IPOInfoUrl.query.filter_by(name=name).first() is not None:
                continue
            if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None:
                continue
            session.add(ipo_url)
    
        session.commit()
    session.close()
def get_ipo_url():
    logging.basicConfig()
    log = logging.getLogger("savant")

    base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month="

    count = 0
    cur_date = date.today()
    oldest_date = date(2010, 1, 1)

    while cur_date >= oldest_date:
        log.info("Getting IPO urls for", cur_date.strftime("%Y-%m"))
        print "Getting IPO urls for", cur_date.strftime("%Y-%m")
        url = base_url + cur_date.strftime("%Y-%m")
        if cur_date.month != 1:
            cur_date = cur_date.replace(month=cur_date.month - 1)
        else:
            cur_date = cur_date.replace(year=cur_date.year - 1, month=12)

        try:
            soup = scraper.get_soup(url)
        except:
            log.info("Could not reach url")
            continue

        table = soup.find("div", {"class": "genTable"})
        if "no data" in table.text:
            log.info("No data for %s" % cur_date.strftime("%Y-%m"))
            continue

        rows = table.tbody.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            name = tds[0].text
            url = tds[0].a["href"]
            symbol = tds[1].text
            ipo_url = IPOInfoUrl(name, symbol, url)
            if IPOInfoUrl.query.filter_by(name=name).first() is not None:
                continue
            if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None:
                continue
            session.add(ipo_url)

        session.commit()
    session.close()
Beispiel #8
0
def populate_ipo_underwriter():
    ipo_urls = IPOInfoUrl.query.all()
    for url in ipo_urls:
        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp:
            continue
        hi = HistoricalIPO.query.filter_by(company_id=comp.id).first()
        if not hi:
            continue
        assoc = CompanyUnderwriterAssociation.query.filter_by(
            company_id=comp.id).first()
        if assoc:
            print "underwriter for", url.symbol, "already in database"
            continue

        exp_url = url.url + '?tab=experts'
        uw_data = scrape_ipo_underwriter(exp_url)
        if uw_data == None:
            continue
        print url.symbol, uw_data
        for i in [0, 1]:
            for uw_item in uw_data[i]:
                if not uw_item:
                    continue
                uw = Underwriter.query.filter_by(name=uw_item).first()
                if not uw:
                    uw = Underwriter(name=uw_item)
                    session.add(uw)
                    session.commit()

                assoc = CompanyUnderwriterAssociation.query.filter_by(
                    company_id=comp.id).filter_by(
                        underwriter_id=uw.id).first()
                if assoc:
                    continue

                if i == 0:
                    lead = True
                else:
                    lead = False
                assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead)
                session.add(assoc)
                session.commit()
Beispiel #9
0
def remove_invalid_sym():
    valid = savant.scraper.get_symbols("NYSE")
    valid += savant.scraper.get_symbols("NASDAQ")
    comps = Company.query.filter(~Company.symbol.in_(valid)).all()
    invalid_id = [x.id for x in comps]
    invalid_symbol = [x.symbol for x in comps]
    #print len(invalid_symbol)
    #iius = IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).all()
    if len(invalid_symbol) == 0:
        return
    his = HistoricalIPO.query.filter(
        HistoricalIPO.company_id.in_(invalid_id)).all()
    ticker = TickDataProcessor()
    for hi in his:
        datestr = hi.ipo_date.strftime('%Y%m%d')
        try:
            #remove them from tick data
            paths = ticker.get_ticks_paths_by_date(hi.company.symbol, datestr)
            for path in paths:
                if path != "":
                    os.remove(path)
        except OSError:
            print "cannot find the file", path

    HistoricalIPO.query.filter(
        HistoricalIPO.company_id.in_(invalid_id)).delete(
            synchronize_session='fetch')
    session.commit()
    IPOInfoUrl.query.filter(IPOInfoUrl.symbol.in_(invalid_symbol)).delete(
        synchronize_session='fetch')
    session.commit()
    CompanyUnderwriterAssociation.query.filter(
        CompanyUnderwriterAssociation.company_id.in_(invalid_id)).delete(
            synchronize_session='fetch')
    session.commit()
    PostIPOPrice.query.filter(PostIPOPrice.company_id.in_(invalid_id)).delete(
        synchronize_session='fetch')
    session.commit()
    Company.query.filter(
        Company.id.in_(invalid_id)).delete(synchronize_session='fetch')
    session.commit()
def populate_ipo_underwriter():
    ipo_urls = IPOInfoUrl.query.all()
    for url in ipo_urls:
        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp: 
            continue
        hi = HistoricalIPO.query.filter_by(company_id=comp.id).first()
        if not hi:
            continue
        assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).first()
        if assoc:
            print "underwriter for", url.symbol, "already in database"
            continue

        exp_url = url.url + '?tab=experts'
        uw_data = scrape_ipo_underwriter(exp_url)
        if uw_data == None:
            continue
        print url.symbol, uw_data
        for i in [0,1]:
            for uw_item in uw_data[i]:
                if not uw_item:
                    continue
                uw = Underwriter.query.filter_by(name = uw_item).first()
                if not uw:
                    uw = Underwriter(name=uw_item)
                    session.add(uw)
                    session.commit()
                
                assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).filter_by(underwriter_id = uw.id).first()
                if assoc:
                    continue

                if i == 0:
                    lead = True
                else:
                    lead = False
                assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead)
                session.add(assoc)
                session.commit()
def update_ipo_tick_info():
    ipos = session.query(
        Company,
        HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).filter(
            HistoricalIPO.open_vol == None).all()
    for ipo in ipos:
        sym = ipo.Company.symbol
        ipo_date = ipo.HistoricalIPO.ipo_date.strftime('%Y%m%d')
        ipo_data_dir = os.path.join(tickdata_dir, ipo_date)
        ipo_data_path = os.path.join(ipo_data_dir,
                                     "%s_markethours.csv.gz" % sym)
        if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path):
            #handle exceptions. WLH has trades prior IPO and it does not have market open signal.
            #            if sym == 'WLH':
            #                open_vol = 1188834
            #            elif sym == 'FCAU':
            #                open_vol = 242453
            #            else:
            print sym
            ticks = data_processor.get_ticks_by_date(sym, ipo_date, ipo_date)
            analyzer = TickDataAnalyzer(ticks)
            open_vol = analyzer.get_open_vol()
            hi = HistoricalIPO.query.filter(
                HistoricalIPO.company_id == ipo.Company.id).first()
            if hi == None:
                continue  #should not happen
            hi.open_vol = open_vol
            hi.first_opening_price = analyzer.get_opening_price()
            hi.first_closing_price = analyzer.get_closing_price()
            hi.first_trade_time = analyzer.get_first_trade_time()
            hi.first_day_high = analyzer.get_high_price()
            hi.first_day_low = analyzer.get_low_price()
            hi.first_day_high_percent_change = analyzer.get_high_percent_change(
            )
            hi.first_day_low_percent_change = analyzer.get_low_percent_change()
            hi.first_day_volume = analyzer.get_volume()
            #print open_vol
    session.commit()
def check_ipo_data_validity():
    ipos  = session.query(Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).all() 
    for ipo in ipos:
        symbol = ipo.Company.symbol
        date = str(ipo.HistoricalIPO.ipo_date).replace('-', '')
        tick_gz_path = settings.DATA_HOME + '/data/' + date + '/' + symbol + '_markethours.csv.gz'
        if not os.path.exists(tick_gz_path):
            hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first()
            if hi is not None:
                hi.validity= 1 
                session.commit()    
        else:
            o_at = o_yh = v_at = v_yh = 0 
            pips_at  = PostIPOPriceAT.query.filter(PostIPOPriceAT.company_id ==ipo.HistoricalIPO.company_id).filter(PostIPOPriceAT.date == ipo.HistoricalIPO.ipo_date).all()
            pips_yh  = PostIPOPriceYahoo.query.filter(PostIPOPriceYahoo.company_id ==ipo.HistoricalIPO.company_id).filter(PostIPOPriceYahoo.date == ipo.HistoricalIPO.ipo_date).all()
            if len(pips_at) > 0:
                o_at = pips_at[0].open
                v_at = pips_at[0].volume
            if len(pips_yh) > 0:
                o_yh = pips_yh[0].open
                v_yh = pips_yh[0].volume
            open_vol = ipo.HistoricalIPO.open_vol
            if v_at <  v_yh/1.2:
                print 'incomplete tick--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol 
                hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first()
                if hi is not None:
                    hi.validity= 2
                    session.commit()    
                continue
            if ipo.HistoricalIPO.open_vol < 5000:
                #only if one of at or yh data is not present
                if min(v_at, v_yh) == 0 or float(max(v_at, v_yh)/min(v_at, v_yh)) > 1.2 or abs(o_at - o_yh) < 0.02:
                    print 'suspicious volume--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol 
                    hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first()
                    if hi is not None:
                        hi.validity= 3
                    session.commit()    
                    continue
            #if float(max(v_at, v_yh))/min(v_at, v_yh) > 1.5 and float(max(v_at, v_yh)/min(v_at, v_yh))< 2.0:
            #if float(max(v_at, v_yh))/min(v_at, v_yh) < 1.2 :   #vol match, does not matter
                if abs(o_at - o_yh) > 0.02:
                    hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == ipo.Company.id).first()
                    if hi is not None:
                        hi.validity= 4 
                    session.commit()    
                    print 'mismatch open--', symbol, 'at:', o_at, v_at, 'yh:', o_yh, v_yh, 'open_vol:', open_vol 
                    continue   # open price match 
def populate_ipo_finance():
    ipo_urls = IPOInfoUrl.query.all()
    for url in ipo_urls:
        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp: 
            continue
        hi = HistoricalIPO.query.filter_by(company_id=comp.id).first()
        if not hi:
            continue
        if hi.revenue != None:
            print "Finance Data existed", url.symbol
            continue
        fin_url = url.url + '?tab=financials'
        fin_data = scrape_ipo_finance(fin_url)
        #print fin_data
        if len(fin_data.keys())< 5:
            print comp.symbol, 'is not updated due to missing financial data', len(fin_data.keys())
            continue
        hi.revenue = fin_data['revenue'] 
        hi.net_income = fin_data['net_income'] 
        hi.total_assets = fin_data['total_assets'] 
        hi.total_liability= fin_data['total_liabilities'] 
        hi.stakeholder_equity = fin_data['stockholders_equity'] 
        session.commit()    
Beispiel #14
0
def get_company_overview(symbol):
    existing = Company.query.filter_by(symbol=symbol).first()
    if existing:
        return existing

    data = scrape_nasdaq(symbol)
    in_nas = 1 
    if not data:
        return None
    elif len(data.keys()) == 1:
        in_nas = 2 
        data.update(scrape_yahoo(symbol, full=True))
    else:
        in_nas = 3 
        data.update(scrape_yahoo(symbol))

    if len(data) == 1:
        return None

    if data["symbol"]=='AMBI':
        print symbol, in_nas

    #existing = Company.query.filter_by(name=data["name"]).first()
    #if existing:
    #    return existing

    if "exchange" in data:
        exch = Exchange.query.filter_by(name=data["exchange"]).first()
        if not exch:
            exch = Exchange(name=data["exchange"])
            session.add(exch)
            session.commit()
        del data["exchange"]
        data["exchange_id"] = exch.id

    if "industry" in data:
        indus = Industry.query.filter_by(name=data["industry"]).first()
        if not indus:
            indus = Industry(name=data["industry"])
            session.add(indus)
            session.commit()
        del data["industry"]
        data["industry_id"] = indus.id

    if "sector" in data:
        sect = Sector.query.filter_by(name=data["sector"]).first()
        if not sect:
            sect = Sector(name=data["sector"])
            session.add(sect)
            session.commit()
        del data["sector"]
        data["sector_id"] = sect.id

    comp = Company(**data)
    return comp
def manual_add_ipo_url():
    ipo_url = IPOInfoUrl('VOXELJET AG', 'VJET', 'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505')
    session.add(ipo_url)
    session.commit()
    
    ipo_url = IPOInfoUrl('AERIE PHARMACEUTICALS INC', 'AERI', 'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508')
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl('ENDOCHOICE HOLDINGS, INC.', 'GI', 'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332')
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl('STG GROUP, INC.', 'GDEF', 'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501')
    session.add(ipo_url)
    session.commit()

    session.close()
def patch_ipo_date():
    wrong_ipo = { 'HHC': '20101105', 
                     'CHKR': '20111111',
                     'NLNK': '20111111',
        #             'WLH': '20130516',
        #             'RTRX': '20140110',
        #             'RXDX': '20140314',
                     'VGGL': '',
        #             'FCAU': '20141212',
                     'BLMT': '20111005',
                     'XELB': ''}

    res  = session.query(Company, HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).filter(Company.symbol.in_(wrong_ipo.keys())).all() 
    tickdata_dir = settings.DOWNLOAD_DIR
    ticker = TickDataProcessor()
    for r in res:
        symbol = r.Company.symbol
        id = r.Company.id
        #remove them from tick data
        datestr = r.HistoricalIPO.ipo_date.strftime('%Y%m%d')
        try:
           paths = ticker.get_ticks_paths_by_date(r.Company.symbol, datestr)
           for path in paths:
              if path != "":
                 os.remove(path)
        except OSError:
           print "cannot find the file", path 
        if wrong_ipo[symbol] == "":
             # remove the data and remove the symbol from ipo related tables as this is not an actual IPO, might be SPO
             HistoricalIPO.query.filter(HistoricalIPO.company_id == id).delete(synchronize_session='fetch')
             session.commit()
             IPOInfoUrl.query.filter(IPOInfoUrl.symbol==symbol).delete(synchronize_session='fetch')
             session.commit()
             PostIPOPrice.query.filter(PostIPOPrice.company_id==id).delete(synchronize_session='fetch')
             session.commit()
        else:
             hi = HistoricalIPO.query.filter(HistoricalIPO.company_id == id).first()
             hi.ipo_date=datetime.strptime(wrong_ipo[symbol], '%Y%m%d').date()

             session.commit()
#fe tch data
             ipo_data_dir = os.path.join(tickdata_dir, wrong_ipo[symbol])
             ipo_data_path = os.path.join(ipo_data_dir, "%s_markethours.csv.gz" % symbol)
             if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path):
                 print "IPO data found"
             else:
                 request = {"command": "get", "symbol": symbol, "date": wrong_ipo[symbol], "gettrade": "true", "getquote": "true"}
                 print request
                 print cjson.encode(request)
                 fetcher_caller = fetcher.FetcherCaller()
                 fetcher_caller.set_request(cjson.encode(request))
                 try:
                     response = fetcher_caller.send_request()
                     fetcher_caller.close()
                 except:
                     print "Unable to send fetch request"
                     continue
    
                 count_down = 60
                 fetched = False
                 while count_down > 0:
                     if os.path.exists(ipo_data_path):
                         print "IPO data fetched:", symbol
                         fetched = True
                         time.sleep(5)
                         break
                     time.sleep(1)
                     count_down -= 1
                 if not fetched:
                     print "Unable to download data for", symbol
Beispiel #17
0
def patch_ipo_date():
    wrong_ipo = {
        'HHC': '20101105',
        'CHKR': '20111111',
        'NLNK': '20111111',
        #             'WLH': '20130516',
        #             'RTRX': '20140110',
        #             'RXDX': '20140314',
        'VGGL': '',
        #             'FCAU': '20141212',
        'BLMT': '20111005',
        'XELB': ''
    }

    res = session.query(
        Company,
        HistoricalIPO).filter(Company.id == HistoricalIPO.company_id).filter(
            Company.symbol.in_(wrong_ipo.keys())).all()
    tickdata_dir = settings.DOWNLOAD_DIR
    ticker = TickDataProcessor()
    for r in res:
        symbol = r.Company.symbol
        id = r.Company.id
        #remove them from tick data
        datestr = r.HistoricalIPO.ipo_date.strftime('%Y%m%d')
        try:
            paths = ticker.get_ticks_paths_by_date(r.Company.symbol, datestr)
            for path in paths:
                if path != "":
                    os.remove(path)
        except OSError:
            print "cannot find the file", path
        if wrong_ipo[symbol] == "":
            # remove the data and remove the symbol from ipo related tables as this is not an actual IPO, might be SPO
            HistoricalIPO.query.filter(HistoricalIPO.company_id == id).delete(
                synchronize_session='fetch')
            session.commit()
            IPOInfoUrl.query.filter(IPOInfoUrl.symbol == symbol).delete(
                synchronize_session='fetch')
            session.commit()
            PostIPOPrice.query.filter(PostIPOPrice.company_id == id).delete(
                synchronize_session='fetch')
            session.commit()
        else:
            hi = HistoricalIPO.query.filter(
                HistoricalIPO.company_id == id).first()
            hi.ipo_date = datetime.strptime(wrong_ipo[symbol], '%Y%m%d').date()

            session.commit()
            #fe tch data
            ipo_data_dir = os.path.join(tickdata_dir, wrong_ipo[symbol])
            ipo_data_path = os.path.join(ipo_data_dir,
                                         "%s_markethours.csv.gz" % symbol)
            if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path):
                print "IPO data found"
            else:
                request = {
                    "command": "get",
                    "symbol": symbol,
                    "date": wrong_ipo[symbol],
                    "gettrade": "true",
                    "getquote": "true"
                }
                print request
                print cjson.encode(request)
                fetcher_caller = fetcher.FetcherCaller()
                fetcher_caller.set_request(cjson.encode(request))
                try:
                    response = fetcher_caller.send_request()
                    fetcher_caller.close()
                except:
                    print "Unable to send fetch request"
                    continue

                count_down = 60
                fetched = False
                while count_down > 0:
                    if os.path.exists(ipo_data_path):
                        print "IPO data fetched:", symbol
                        fetched = True
                        time.sleep(5)
                        break
                    time.sleep(1)
                    count_down -= 1
                if not fetched:
                    print "Unable to download data for", symbol
Beispiel #18
0
#unwr_dict = scraper.get_underwriters()
count = 0

for symbol in symbols:
    count += 1
    if count % 10 == 0:
        print count
    if "-" in symbol:
        continue

    comp = scraper.get_company_overview(symbol)
    #    if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first():
    if comp:
        if not Company.query.filter_by(symbol=comp.symbol).first():
            session.add(comp)
            session.commit()
        else:
            print "Company exists in db"
    """
    if symbol in unwr_dict:
        underwriters = [u.strip() for u in unwr_dict[symbol].split("/")]
        for u in underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
            a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True)
            comp.underwriters.append(a)
#unwr_dict = scraper.get_underwriters()
count = 0

for symbol in symbols:
    count += 1
    if count % 10 == 0:
        print count
    if "-" in symbol:
        continue

    comp = scraper.get_company_overview(symbol)
#    if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first():
    if comp:
        if not Company.query.filter_by(symbol=comp.symbol).first():
            session.add(comp)
            session.commit()
        else:
            print "Company exists in db"


    """
    if symbol in unwr_dict:
        underwriters = [u.strip() for u in unwr_dict[symbol].split("/")]
        for u in underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
def populate_ipo_table():
    ipo_urls = IPOInfoUrl.query.all()

    known_unwrs = set()

    for url in ipo_urls:

        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp:
            #        session.add(comp)
            #        session.commit()
            continue

        if HistoricalIPO.query.filter_by(company_id=comp.id).first():
            print "Data exists for:", url.symbol
            continue

    #    comp = get_company_overview(url.symbol)
    #    if not comp:
    #        log.warning("Cannot get company info for %s" % url.symbol)
    #        continue

        ipo_data = scrape_ipo(url.url)
        if ipo_data == {}:
            continue
        log.info("IPO data from NASDAQ.com:\n%s" % cjson.encode(ipo_data))
        underwriters = ipo_data["underwriters"]
        lead_underwriters = ipo_data["lead_underwriters"]
        del ipo_data["underwriters"]
        del ipo_data["lead_underwriters"]

        ipo_date = ipo_data["ipo_date"]
        try:
            month, day, year = [int(i) for i in ipo_date.split("/")]
            ipo_date = datetime.date(year, month, day).strftime("%Y%m%d")
            #ipo_data["ipo_date"] = datetime.date(year, month, day).strftime("%Y-%m-%d")
            ipo_data["ipo_date"] = datetime.date(year, month, day)
        except:
            log.error("Error in IPO date:%s" % url.symbol)
            continue

        ipo_data_dir = os.path.join(tickdata_dir, ipo_date)
        ipo_data_path = os.path.join(ipo_data_dir,
                                     "%s_markethours.csv.gz" % url.symbol)
        exist = False
        if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path):
            exist = True
            log.info("IPO data found")
        else:
            request = {
                "command": "get",
                "symbol": url.symbol,
                "date": ipo_date,
                "gettrade": "true",
                "getquote": "true"
            }
            try:
                fetcher_caller = fetcher.FetcherCaller()
                fetcher_caller.set_request(cjson.encode(request))
                response = fetcher_caller.send_request()
                fetcher_caller.close()
            except:
                log.error("Unable to send fetch request")
                continue

            count_down = 60
            fetched = False
            while count_down > 0:
                if os.path.exists(ipo_data_path):
                    log.info("IPO data fetched: %s" % url.symbol)
                    fetched = True
                    time.sleep(5)
                    break
                time.sleep(1)
                count_down -= 1
            if not fetched:
                log.error("Unable to download data for %s" % url.symbol)

        if exist or fetched:
            itd = process_ipo_tick_data(symbol, ipo_date)
            ipo_data["open_vol"] = itd["open_vol"]
            ipo_data["first_opening_price"] = itd["first_opening_price"]
            ipo_data["first_closing_price"] = itd["first_closing_price"]
            ipo_data["first_trade_time"] = itd["first_trade_time"]
            ipo_data["first_day_high"] = itd["first_day_high"]
            ipo_data["first_day_low"] = itd["first_day_low"]
            ipo_data["first_day_high_percent_change"] = itd[
                "first_day_high_percent_change"]
            ipo_data["first_day_low_percent_change"] = itd[
                "first_day_low_percent_change"]
            ipo_data["first_day_volume"] = itd["first_day_volume"]
        else:
            ipo_data["open_vol"] = None
            ipo_data["first_opening_price"] = None
            ipo_data["first_closing_price"] = None
            ipo_data["first_trade_time"] = None
            ipo_data["first_day_high"] = None
            ipo_data["first_day_low"] = None
            ipo_data["first_day_high_percent_change"] = None
            ipo_data["first_day_low_percent_change"] = None
            ipo_data["first_day_volume"] = None

        ipo_data["scoop_rating"] = 0
        ipo_data["company_id"] = comp.id
        log.info("Final IPO data for %s:\n%s" % (url.symbol, ipo_data))
        """
        for u in underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
            a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=False)
            comp.underwriters.append(a)
            session.commit()
    
        for u in lead_underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
            a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True)
            comp.underwriters.append(a)
            session.commit()
        """

        historical_ipo = HistoricalIPO(**ipo_data)
        session.add(historical_ipo)
        session.commit()