def estimatesOK(datatype, warn, stepSize=100000): #get the database database = newdb.get_db() warnings = 0 errors = 0 ############################### util.info("\nChecking " + database.CO_ESTIMATES + datatype) database.execute("HANDLER {} OPEN AS foobar".format(database.CO_ESTIMATES + datatype)) command = "HANDLER foobar READ `PRIMARY` NEXT LIMIT {}".format(stepSize) keys = ("coid", "type", "date", "brokerid", "orig") stats = Counter() while True: batch = __getNextBatchFromHandler(keys, database, command) if batch is None: break __verify(batch, 0, len(batch), ("value", "backfill", "currency"), warn, stats) warnings = warnings + stats["warnings"] errors = errors + stats["errors"] util.info("Errors={}, Warnings={}".format(stats["errors"], stats["warnings"])) del stats["warnings"] del stats["errors"] for k, v in stats.iteritems(): util.info("{} = {}".format(k, v)) database.execute("HANDLER foobar CLOSE") return (warnings, errors)
def priceOK(warn, stepSize=100000): #get the database database = newdb.get_db() warnings = 0 errors = 0 ############################### util.info("\nChecking " + database.PRICE_FULL_TABLE) database.execute("HANDLER {} OPEN AS foobar".format( database.PRICE_FULL_TABLE)) command = "HANDLER foobar READ `PRIMARY` NEXT LIMIT {}".format(stepSize) keys = ("secid", "date") stats = Counter() while True: batch = __getNextBatchFromHandler(keys, database, command) if batch is None: break __verify(batch, 0, len(batch), ("open", "high", "low", "close", "volume", "adj", "adrrc", "cond", "backfill", "currency"), warn, stats) warnings = warnings + stats["warnings"] errors = errors + stats["errors"] util.info("Errors={}, Warnings={}".format(stats["errors"], stats["warnings"])) del stats["warnings"] del stats["errors"] for k, v in stats.iteritems(): util.info("{} = {}".format(k, v)) database.execute("HANDLER foobar CLOSE") return (warnings, errors)
def xrefsOK(): #get the database database = newdb.get_db() warnings = 0 errors = 0 ############################### util.info("\nChecking xrefs based on SecIds") cursor = database.execute( "SELECT * FROM {} ORDER BY source,secid,xref_type,born".format( database.XREF_TABLE)) #database.execute("SELECT * FROM {} ORDER BY source,secid,xref_type,born".format("xref")) #cursor=database._curs buffer = [] keys = ("secid", "xref_type", "source") stats = Counter() while True: batch = __getNextBatchFromCursor(keys, cursor, buffer) if batch is None: break __verify(batch, 0, len(batch), ("value", ), False, stats) warnings = warnings + stats["warnings"] errors = errors + stats["errors"] util.info("Errors={}, Warnings={}".format(stats["errors"], stats["warnings"])) del stats["warnings"] del stats["errors"] for k, v in stats.iteritems(): util.info("{} = {}".format(k, v)) ################################### util.info("\nChecking xrefs based on Values") cursor = database.execute( "SELECT xf.secid,xf.xref_type,xf.value,xf.source,cs.coid,cs.issueid,cs.country,xf.born,xf.died FROM {} as xf, {} as cs WHERE xf.secid=cs.secid ORDER BY xf.source,xf.xref_type,xf.value,xf.born" .format(database.XREF_TABLE, database.STOCK_TABLE)) #database.execute("SELECT xf.secid,xf.xref_type,xf.value,xf.source,cs.coid,cs.issueid,cs.country,xf.born,xf.died FROM {} as xf, {} as cs WHERE xf.secid=cs.secid ORDER BY xf.source,xf.xref_type,xf.value,xf.born".format("xref","stock")) #cursor=database._curs buffer = [] keys = ("value", "xref_type", "source") stats = Counter() while True: batch = __getNextBatchFromCursor(keys, cursor, buffer) if batch is None: break __verifySymbols(batch, 0, len(batch), ("secid", ), False, stats) warnings = warnings + stats["warnings"] errors = errors + stats["errors"] util.info("Errors={}, Warnings={}".format(stats["errors"], stats["warnings"])) del stats["warnings"] del stats["errors"] for k, v in stats.iteritems(): util.info("{} = {}".format(k, v)) return (errors == 0)
dest="ignore_mod_time", type=int, default=0) parser.add_option("-l", "--process_lag", dest="lag", type=float) (options, args) = parser.parse_args() assert options.ignore_mod_time in (0, 1, 2) if options.debug: util.set_debug() else: util.set_log_file("all", True) if options.db == "pri": newdb.init_db() database = newdb.get_db() elif options.db == "sec": newdb.init_db(os.environ["SEC_DB_CONFIG_FILE"]) database = newdb.get_db() else: util.error("Valid database choices are [pri|sec]") sys.exit(1) # Check for previously running instance if not database.getProcessedFilesLock(): util.warning("Not processing, previous instance running") sys.exit(1) #XXX may want to precache seen files for speed in loading try: for source in options.source.split("+"):
def main(): global database parser = OptionParser() parser.add_option('-d', '--date', dest='date', help='Date to get data distribution') parser.add_option('-g', '--groups', dest='groups', help='groups.ports file') parser.add_option('-t', '--tickers_file', dest='tickers_file', help='tickers file') parser.add_option('-k', '--keeptogether', dest='keeptogether', help='list of stocks to keep on the same server') parser.add_option( '-a', '--addtoall', dest='addtoall', help='add symbols to all universes (SHOULD NOT BE TRADED)"', default="SPY") parser.add_option('-s', '--secmaster', dest='secmaster', help='Security master file for list of valid symbols') parser.add_option('-m', '--overflow_mult', dest='overflow_mult', help='Multiply the overflow bucket\'s volume', default=1.0) opt, insts = parser.parse_args() random.shuffle(insts) if opt.date is None or opt.groups is None or opt.tickers_file is None or opt.secmaster is None: util.error("All options must be set:") exit(2) if len(insts) < 1: util.error("Must specify at least one instance.") newdb.init_db() database = newdb.get_db() secmaster = get_symlist(opt.secmaster) secid2tickers = get_secid2tickers(opt.tickers_file) for secid, ticker in secid2tickers.items(): if ticker not in secmaster: del secid2tickers[secid] universe = set(secid2tickers.values()) massive = get_massive(opt.groups) dist = get_dist(opt.date, secid2tickers, massive, float(opt.overflow_mult)) nodist = universe - set(dist.keys()) util.info("%d symbols without data distribution: %s" % (len(nodist), " ".join(nodist))) dist.update(map(lambda k: (k, 0.0), universe - set(dist.keys()))) if opt.keeptogether is not None: keep = get_symlist(opt.keeptogether) else: keep = set() if opt.addtoall is not None: all = set(opt.addtoall.split(",")) else: all = set() assign = distribute(dist, universe, massive, insts, keep, all) for (vol, symset, instname) in assign: util.info("Instance %s sees %4.2f%% volume, trades %d symbols" % (instname, vol * 100, len(symset))) symfile = open(instname, 'w') symlist = list(symset) symlist.sort() symfile.writelines(map(lambda s: s + "\n", symlist)) symfile.close()
parser.add_argument("--email", action="store_const", const=True, dest="email", default=False) args = parser.parse_args() #Set debug if args.debug: util.set_debug() else: util.set_log_file() newdb.init_db() backoffice.database = newdb.get_db() #Figure out from-to dates dayDelta = datetime.timedelta(days=1) if args.singleDate is not None: fromDate = datetime.datetime.strptime(args.singleDate, "%Y%m%d") toDate = fromDate + dayDelta elif args.fromDate is not None and args.toDate is not None: fromDate = datetime.datetime.strptime(args.fromDate, "%Y%m%d") toDate = datetime.datetime.strptime(args.toDate, "%Y%m%d") elif args.recent is True: toDate = datetime.datetime.utcnow() toDate = datetime.datetime.strptime(toDate.strftime("%Y%m%d"), "%Y%m%d") #Get only date fromDate = toDate - dayDelta
attrs[i + 1]) if attrs[i]['died'] == attrs[i + 1]['born']: assert attrs[i]['value'] != attrs[i + 1]['value'], ( attrs[i], attrs[i + 1]) else: assert attrs[i]['died'] is None or attrs[i][ 'died'] > attrs[i]['born'], attrs[i] print "Top 10 company attribute counts" db.execute( "SELECT %s, COUNT(*) AS count FROM %s GROUP BY %s ORDER BY count DESC LIMIT 10" % (key, table, key)) for row in db._curs.fetchall(): print row print "Top 10 attribute counts" db.execute( "SELECT a.name, COUNT(*) AS count FROM " + table + " JOIN attribute_type a on type = a.code GROUP BY a.name ORDER BY count DESC LIMIT 10" ) for row in db._curs.fetchall(): print row if __name__ == "__main__": util.set_debug() newdb.init_db() db = newdb.get_db() main()
def __init__(self): newdb.init_db() database = newdb.get_db() rows = database.execute( "SELECT value FROM {} WHERE xref_type=%(type)s AND source=%(source)s AND born<=%(now)s AND (died>%(now)s OR died is NULL)" .format(database.XREF_TABLE), { "type": database.getXrefType("TIC"), "now": util.now(), "source": database.getSourceType("compustat_idhist") }).fetchall() tickers = [ row['value'] for row in rows if re.match("[0-9].+", row["value"]) == None ] util.info("Retrieving info on %d tickers" % len(tickers)) database.close() fields = [ "symbol", "name", "exchange", "error_flag", "market_cap", "avg_daily_volume", "ex_dividend_date", "dividend_pay_date", "dividend_share_ratio", "dividend_yield", #"ebitda", "earnings_share_ratio", "eps_est_cur_year", "eps_est_next_qtr", "eps_est_next_year", "pe_ratio", "peg_ratio", "price_book_ratio", "price_eps_est_cur_year_ratio", "price_eps_est_next_year_ratio", "price_sales_ratio", "short_ratio", ] # Grab data data = ystockquote.get_symbols(tickers, fields) # Save data to temp dir tempdir = tempfile.mkdtemp(dir=os.environ['TMP_DIR']) f = open("%s/yahoo.csv" % tempdir, "w") writer = csv.DictWriter(f, fields) rows = [dict(zip(fields, fields))] rows.extend(data.values()) writer.writerows(rows) f.close() # Zip file result = os.system( "zip -j %s/yahoo-%s.csv.zip %s/yahoo.csv 1>/dev/null" % (tempdir, datetime.datetime.now().strftime("%Y%m%d%H%M"), tempdir)) if (result != 0): shutil.rmtree(tempdir) raise DataSourceError("Could not zip file") os.remove("%s/yahoo.csv" % tempdir) self._remote_dir = tempdir