def manual_add_ipo_url():
    ipo_url = IPOInfoUrl(
        'VOXELJET AG', 'VJET',
        'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505')
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl(
        'AERIE PHARMACEUTICALS INC', 'AERI',
        'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508'
    )
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl(
        'ENDOCHOICE HOLDINGS, INC.', 'GI',
        'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332'
    )
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl(
        'STG GROUP, INC.', 'GDEF',
        'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501'
    )
    session.add(ipo_url)
    session.commit()

    session.close()
def get_ipo_url():
    logging.basicConfig()
    log = logging.getLogger("savant")
    
    base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month="
    
    count = 0
    cur_date = date.today()
    oldest_date = date(2010, 1, 1)
    
    while cur_date >= oldest_date:
        log.info("Getting IPO urls for", cur_date.strftime("%Y-%m"))
        print "Getting IPO urls for", cur_date.strftime("%Y-%m")
        url = base_url + cur_date.strftime("%Y-%m")
        if cur_date.month != 1:
            cur_date = cur_date.replace(month=cur_date.month-1)
        else:
            cur_date = cur_date.replace(year=cur_date.year-1, month=12)
    
        try:
            soup = scraper.get_soup(url)
        except:
            log.info("Could not reach url")
            continue
    
        table = soup.find("div", {"class": "genTable"})
        if "no data" in table.text:
            log.info("No data for %s" % cur_date.strftime("%Y-%m"))
            continue
    
        rows = table.tbody.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            name = tds[0].text
            url = tds[0].a["href"]
            symbol = tds[1].text
            ipo_url = IPOInfoUrl(name, symbol, url)
            if IPOInfoUrl.query.filter_by(name=name).first() is not None:
                continue
            if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None:
                continue
            session.add(ipo_url)
    
        session.commit()
    session.close()
def get_ipo_url():
    logging.basicConfig()
    log = logging.getLogger("savant")

    base_url = "http://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month="

    count = 0
    cur_date = date.today()
    oldest_date = date(2010, 1, 1)

    while cur_date >= oldest_date:
        log.info("Getting IPO urls for", cur_date.strftime("%Y-%m"))
        print "Getting IPO urls for", cur_date.strftime("%Y-%m")
        url = base_url + cur_date.strftime("%Y-%m")
        if cur_date.month != 1:
            cur_date = cur_date.replace(month=cur_date.month - 1)
        else:
            cur_date = cur_date.replace(year=cur_date.year - 1, month=12)

        try:
            soup = scraper.get_soup(url)
        except:
            log.info("Could not reach url")
            continue

        table = soup.find("div", {"class": "genTable"})
        if "no data" in table.text:
            log.info("No data for %s" % cur_date.strftime("%Y-%m"))
            continue

        rows = table.tbody.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            name = tds[0].text
            url = tds[0].a["href"]
            symbol = tds[1].text
            ipo_url = IPOInfoUrl(name, symbol, url)
            if IPOInfoUrl.query.filter_by(name=name).first() is not None:
                continue
            if IPOInfoUrl.query.filter_by(symbol=symbol).first() is not None:
                continue
            session.add(ipo_url)

        session.commit()
    session.close()
Ejemplo n.º 4
0
def populate_ipo_underwriter():
    ipo_urls = IPOInfoUrl.query.all()
    for url in ipo_urls:
        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp:
            continue
        hi = HistoricalIPO.query.filter_by(company_id=comp.id).first()
        if not hi:
            continue
        assoc = CompanyUnderwriterAssociation.query.filter_by(
            company_id=comp.id).first()
        if assoc:
            print "underwriter for", url.symbol, "already in database"
            continue

        exp_url = url.url + '?tab=experts'
        uw_data = scrape_ipo_underwriter(exp_url)
        if uw_data == None:
            continue
        print url.symbol, uw_data
        for i in [0, 1]:
            for uw_item in uw_data[i]:
                if not uw_item:
                    continue
                uw = Underwriter.query.filter_by(name=uw_item).first()
                if not uw:
                    uw = Underwriter(name=uw_item)
                    session.add(uw)
                    session.commit()

                assoc = CompanyUnderwriterAssociation.query.filter_by(
                    company_id=comp.id).filter_by(
                        underwriter_id=uw.id).first()
                if assoc:
                    continue

                if i == 0:
                    lead = True
                else:
                    lead = False
                assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead)
                session.add(assoc)
                session.commit()
def populate_ipo_underwriter():
    ipo_urls = IPOInfoUrl.query.all()
    for url in ipo_urls:
        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp: 
            continue
        hi = HistoricalIPO.query.filter_by(company_id=comp.id).first()
        if not hi:
            continue
        assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).first()
        if assoc:
            print "underwriter for", url.symbol, "already in database"
            continue

        exp_url = url.url + '?tab=experts'
        uw_data = scrape_ipo_underwriter(exp_url)
        if uw_data == None:
            continue
        print url.symbol, uw_data
        for i in [0,1]:
            for uw_item in uw_data[i]:
                if not uw_item:
                    continue
                uw = Underwriter.query.filter_by(name = uw_item).first()
                if not uw:
                    uw = Underwriter(name=uw_item)
                    session.add(uw)
                    session.commit()
                
                assoc= CompanyUnderwriterAssociation.query.filter_by(company_id=comp.id).filter_by(underwriter_id = uw.id).first()
                if assoc:
                    continue

                if i == 0:
                    lead = True
                else:
                    lead = False
                assoc = CompanyUnderwriterAssociation(comp.id, uw.id, lead)
                session.add(assoc)
                session.commit()
Ejemplo n.º 6
0
def get_company_overview(symbol):
    existing = Company.query.filter_by(symbol=symbol).first()
    if existing:
        return existing

    data = scrape_nasdaq(symbol)
    in_nas = 1 
    if not data:
        return None
    elif len(data.keys()) == 1:
        in_nas = 2 
        data.update(scrape_yahoo(symbol, full=True))
    else:
        in_nas = 3 
        data.update(scrape_yahoo(symbol))

    if len(data) == 1:
        return None

    if data["symbol"]=='AMBI':
        print symbol, in_nas

    #existing = Company.query.filter_by(name=data["name"]).first()
    #if existing:
    #    return existing

    if "exchange" in data:
        exch = Exchange.query.filter_by(name=data["exchange"]).first()
        if not exch:
            exch = Exchange(name=data["exchange"])
            session.add(exch)
            session.commit()
        del data["exchange"]
        data["exchange_id"] = exch.id

    if "industry" in data:
        indus = Industry.query.filter_by(name=data["industry"]).first()
        if not indus:
            indus = Industry(name=data["industry"])
            session.add(indus)
            session.commit()
        del data["industry"]
        data["industry_id"] = indus.id

    if "sector" in data:
        sect = Sector.query.filter_by(name=data["sector"]).first()
        if not sect:
            sect = Sector(name=data["sector"])
            session.add(sect)
            session.commit()
        del data["sector"]
        data["sector_id"] = sect.id

    comp = Company(**data)
    return comp
def manual_add_ipo_url():
    ipo_url = IPOInfoUrl('VOXELJET AG', 'VJET', 'http://www.nasdaq.com/markets/ipos/company/voxeljet-ag-915787-73505')
    session.add(ipo_url)
    session.commit()
    
    ipo_url = IPOInfoUrl('AERIE PHARMACEUTICALS INC', 'AERI', 'http://www.nasdaq.com/markets/ipos/company/aerie-pharmaceuticals-inc-684178-73508')
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl('ENDOCHOICE HOLDINGS, INC.', 'GI', 'http://www.nasdaq.com/markets/ipos/company/endochoice-holdings-inc-948923-78332')
    session.add(ipo_url)
    session.commit()

    ipo_url = IPOInfoUrl('STG GROUP, INC.', 'GDEF', 'http://www.nasdaq.com/markets/ipos/company/global-defense-national-security-systems-inc-915709-73501')
    session.add(ipo_url)
    session.commit()

    session.close()
#unwr_dict = scraper.get_underwriters()
count = 0

for symbol in symbols:
    count += 1
    if count % 10 == 0:
        print count
    if "-" in symbol:
        continue

    comp = scraper.get_company_overview(symbol)
#    if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first():
    if comp:
        if not Company.query.filter_by(symbol=comp.symbol).first():
            session.add(comp)
            session.commit()
        else:
            print "Company exists in db"


    """
    if symbol in unwr_dict:
        underwriters = [u.strip() for u in unwr_dict[symbol].split("/")]
        for u in underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
def populate_ipo_table():
    ipo_urls = IPOInfoUrl.query.all()

    known_unwrs = set()

    for url in ipo_urls:

        comp = Company.query.filter_by(symbol=url.symbol).first()
        if not comp:
            #        session.add(comp)
            #        session.commit()
            continue

        if HistoricalIPO.query.filter_by(company_id=comp.id).first():
            print "Data exists for:", url.symbol
            continue

    #    comp = get_company_overview(url.symbol)
    #    if not comp:
    #        log.warning("Cannot get company info for %s" % url.symbol)
    #        continue

        ipo_data = scrape_ipo(url.url)
        if ipo_data == {}:
            continue
        log.info("IPO data from NASDAQ.com:\n%s" % cjson.encode(ipo_data))
        underwriters = ipo_data["underwriters"]
        lead_underwriters = ipo_data["lead_underwriters"]
        del ipo_data["underwriters"]
        del ipo_data["lead_underwriters"]

        ipo_date = ipo_data["ipo_date"]
        try:
            month, day, year = [int(i) for i in ipo_date.split("/")]
            ipo_date = datetime.date(year, month, day).strftime("%Y%m%d")
            #ipo_data["ipo_date"] = datetime.date(year, month, day).strftime("%Y-%m-%d")
            ipo_data["ipo_date"] = datetime.date(year, month, day)
        except:
            log.error("Error in IPO date:%s" % url.symbol)
            continue

        ipo_data_dir = os.path.join(tickdata_dir, ipo_date)
        ipo_data_path = os.path.join(ipo_data_dir,
                                     "%s_markethours.csv.gz" % url.symbol)
        exist = False
        if os.path.exists(ipo_data_dir) and os.path.exists(ipo_data_path):
            exist = True
            log.info("IPO data found")
        else:
            request = {
                "command": "get",
                "symbol": url.symbol,
                "date": ipo_date,
                "gettrade": "true",
                "getquote": "true"
            }
            try:
                fetcher_caller = fetcher.FetcherCaller()
                fetcher_caller.set_request(cjson.encode(request))
                response = fetcher_caller.send_request()
                fetcher_caller.close()
            except:
                log.error("Unable to send fetch request")
                continue

            count_down = 60
            fetched = False
            while count_down > 0:
                if os.path.exists(ipo_data_path):
                    log.info("IPO data fetched: %s" % url.symbol)
                    fetched = True
                    time.sleep(5)
                    break
                time.sleep(1)
                count_down -= 1
            if not fetched:
                log.error("Unable to download data for %s" % url.symbol)

        if exist or fetched:
            itd = process_ipo_tick_data(symbol, ipo_date)
            ipo_data["open_vol"] = itd["open_vol"]
            ipo_data["first_opening_price"] = itd["first_opening_price"]
            ipo_data["first_closing_price"] = itd["first_closing_price"]
            ipo_data["first_trade_time"] = itd["first_trade_time"]
            ipo_data["first_day_high"] = itd["first_day_high"]
            ipo_data["first_day_low"] = itd["first_day_low"]
            ipo_data["first_day_high_percent_change"] = itd[
                "first_day_high_percent_change"]
            ipo_data["first_day_low_percent_change"] = itd[
                "first_day_low_percent_change"]
            ipo_data["first_day_volume"] = itd["first_day_volume"]
        else:
            ipo_data["open_vol"] = None
            ipo_data["first_opening_price"] = None
            ipo_data["first_closing_price"] = None
            ipo_data["first_trade_time"] = None
            ipo_data["first_day_high"] = None
            ipo_data["first_day_low"] = None
            ipo_data["first_day_high_percent_change"] = None
            ipo_data["first_day_low_percent_change"] = None
            ipo_data["first_day_volume"] = None

        ipo_data["scoop_rating"] = 0
        ipo_data["company_id"] = comp.id
        log.info("Final IPO data for %s:\n%s" % (url.symbol, ipo_data))
        """
        for u in underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
            a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=False)
            comp.underwriters.append(a)
            session.commit()
    
        for u in lead_underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
            a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True)
            comp.underwriters.append(a)
            session.commit()
        """

        historical_ipo = HistoricalIPO(**ipo_data)
        session.add(historical_ipo)
        session.commit()
    #else:
    #    data.update(scraper.scrape_yahoo(symbol))

    if len(data) == 1:
        continue

    if Company.query.filter_by(name=data["name"]).first():
        continue

    print data
    print known_exchs
    if "exchange" in data:
        if data["exchange"] not in known_exchs:
            print >> sys.stderr, data["exchange"]
            exch = Exchange(name=data["exchange"])
            session.add(exch)
            session.commit()
            known_exchs.add(data["exchange"])
        else:
            exch = Exchange.query.filter_by(name=data["exchange"]).first()
        del data["exchange"]
        data["exchange_id"] = exch.id
 
    if "industry" in data:
        if data["industry"] not in known_industries:
            indus = Industry(name=data["industry"])
            session.add(indus)
            session.commit()
            known_industries.add(data["industry"])
        else:
            indus = Industry.query.filter_by(name=data["industry"]).first()
Ejemplo n.º 11
0
#unwr_dict = scraper.get_underwriters()
count = 0

for symbol in symbols:
    count += 1
    if count % 10 == 0:
        print count
    if "-" in symbol:
        continue

    comp = scraper.get_company_overview(symbol)
    #    if comp and not Company.query.filter_by(symbol=comp.symbol).first() and not Company.query.filter_by(name=comp.name).first():
    if comp:
        if not Company.query.filter_by(symbol=comp.symbol).first():
            session.add(comp)
            session.commit()
        else:
            print "Company exists in db"
    """
    if symbol in unwr_dict:
        underwriters = [u.strip() for u in unwr_dict[symbol].split("/")]
        for u in underwriters:
            if u in known_unwrs:
                unwr = Underwriter.query.filter_by(name=u).first()
            else:
                unwr = Underwriter(u)
                known_unwrs.add(u)
            session.add(unwr)
            session.commit()
            a = CompanyUnderwriterAssociation(company_id=comp.id, underwriter_id=unwr.id, lead=True)