Beispiel #1
0
def estimatesOK(datatype, warn, stepSize=100000):
    #get the database
    database = newdb.get_db()
    warnings = 0
    errors = 0

    ###############################

    util.info("\nChecking " + database.CO_ESTIMATES + datatype)
    database.execute("HANDLER {} OPEN AS foobar".format(database.CO_ESTIMATES +
                                                        datatype))
    command = "HANDLER foobar READ `PRIMARY` NEXT LIMIT {}".format(stepSize)
    keys = ("coid", "type", "date", "brokerid", "orig")
    stats = Counter()
    while True:
        batch = __getNextBatchFromHandler(keys, database, command)
        if batch is None:
            break
        __verify(batch, 0, len(batch), ("value", "backfill", "currency"), warn,
                 stats)
        warnings = warnings + stats["warnings"]
        errors = errors + stats["errors"]

    util.info("Errors={}, Warnings={}".format(stats["errors"],
                                              stats["warnings"]))
    del stats["warnings"]
    del stats["errors"]
    for k, v in stats.iteritems():
        util.info("{} = {}".format(k, v))

    database.execute("HANDLER foobar CLOSE")

    return (warnings, errors)
Beispiel #2
0
def priceOK(warn, stepSize=100000):
    #get the database
    database = newdb.get_db()
    warnings = 0
    errors = 0

    ###############################

    util.info("\nChecking " + database.PRICE_FULL_TABLE)
    database.execute("HANDLER {} OPEN AS foobar".format(
        database.PRICE_FULL_TABLE))
    command = "HANDLER foobar READ `PRIMARY` NEXT LIMIT {}".format(stepSize)
    keys = ("secid", "date")
    stats = Counter()
    while True:
        batch = __getNextBatchFromHandler(keys, database, command)
        if batch is None:
            break
        __verify(batch, 0, len(batch),
                 ("open", "high", "low", "close", "volume", "adj", "adrrc",
                  "cond", "backfill", "currency"), warn, stats)
        warnings = warnings + stats["warnings"]
        errors = errors + stats["errors"]

    util.info("Errors={}, Warnings={}".format(stats["errors"],
                                              stats["warnings"]))
    del stats["warnings"]
    del stats["errors"]
    for k, v in stats.iteritems():
        util.info("{} = {}".format(k, v))

    database.execute("HANDLER foobar CLOSE")

    return (warnings, errors)
Beispiel #3
0
def xrefsOK():
    #get the database
    database = newdb.get_db()

    warnings = 0
    errors = 0

    ###############################

    util.info("\nChecking xrefs based on SecIds")
    cursor = database.execute(
        "SELECT * FROM {} ORDER BY source,secid,xref_type,born".format(
            database.XREF_TABLE))
    #database.execute("SELECT * FROM {} ORDER BY source,secid,xref_type,born".format("xref"))
    #cursor=database._curs
    buffer = []
    keys = ("secid", "xref_type", "source")
    stats = Counter()
    while True:
        batch = __getNextBatchFromCursor(keys, cursor, buffer)
        if batch is None:
            break
        __verify(batch, 0, len(batch), ("value", ), False, stats)
    warnings = warnings + stats["warnings"]
    errors = errors + stats["errors"]

    util.info("Errors={}, Warnings={}".format(stats["errors"],
                                              stats["warnings"]))
    del stats["warnings"]
    del stats["errors"]
    for k, v in stats.iteritems():
        util.info("{} = {}".format(k, v))

    ###################################

    util.info("\nChecking xrefs based on Values")
    cursor = database.execute(
        "SELECT xf.secid,xf.xref_type,xf.value,xf.source,cs.coid,cs.issueid,cs.country,xf.born,xf.died FROM {} as xf, {} as cs WHERE xf.secid=cs.secid ORDER BY xf.source,xf.xref_type,xf.value,xf.born"
        .format(database.XREF_TABLE, database.STOCK_TABLE))
    #database.execute("SELECT xf.secid,xf.xref_type,xf.value,xf.source,cs.coid,cs.issueid,cs.country,xf.born,xf.died FROM {} as xf, {} as cs WHERE xf.secid=cs.secid ORDER BY xf.source,xf.xref_type,xf.value,xf.born".format("xref","stock"))
    #cursor=database._curs
    buffer = []
    keys = ("value", "xref_type", "source")
    stats = Counter()
    while True:
        batch = __getNextBatchFromCursor(keys, cursor, buffer)
        if batch is None:
            break
        __verifySymbols(batch, 0, len(batch), ("secid", ), False, stats)
    warnings = warnings + stats["warnings"]
    errors = errors + stats["errors"]

    util.info("Errors={}, Warnings={}".format(stats["errors"],
                                              stats["warnings"]))
    del stats["warnings"]
    del stats["errors"]
    for k, v in stats.iteritems():
        util.info("{} = {}".format(k, v))

    return (errors == 0)
Beispiel #4
0
                      dest="ignore_mod_time",
                      type=int,
                      default=0)
    parser.add_option("-l", "--process_lag", dest="lag", type=float)
    (options, args) = parser.parse_args()

    assert options.ignore_mod_time in (0, 1, 2)

    if options.debug:
        util.set_debug()
    else:
        util.set_log_file("all", True)

    if options.db == "pri":
        newdb.init_db()
        database = newdb.get_db()
    elif options.db == "sec":
        newdb.init_db(os.environ["SEC_DB_CONFIG_FILE"])
        database = newdb.get_db()
    else:
        util.error("Valid database choices are [pri|sec]")
        sys.exit(1)

    # Check for previously running instance
    if not database.getProcessedFilesLock():
        util.warning("Not processing, previous instance running")
        sys.exit(1)

        #XXX may want to precache seen files for speed in loading
    try:
        for source in options.source.split("+"):
Beispiel #5
0
def main():
    global database
    parser = OptionParser()
    parser.add_option('-d',
                      '--date',
                      dest='date',
                      help='Date to get data distribution')
    parser.add_option('-g',
                      '--groups',
                      dest='groups',
                      help='groups.ports file')
    parser.add_option('-t',
                      '--tickers_file',
                      dest='tickers_file',
                      help='tickers file')
    parser.add_option('-k',
                      '--keeptogether',
                      dest='keeptogether',
                      help='list of stocks to keep on the same server')
    parser.add_option(
        '-a',
        '--addtoall',
        dest='addtoall',
        help='add symbols to all universes (SHOULD NOT BE TRADED)"',
        default="SPY")
    parser.add_option('-s',
                      '--secmaster',
                      dest='secmaster',
                      help='Security master file for list of valid symbols')
    parser.add_option('-m',
                      '--overflow_mult',
                      dest='overflow_mult',
                      help='Multiply the overflow bucket\'s volume',
                      default=1.0)
    opt, insts = parser.parse_args()
    random.shuffle(insts)
    if opt.date is None or opt.groups is None or opt.tickers_file is None or opt.secmaster is None:
        util.error("All options must be set:")
        exit(2)

    if len(insts) < 1:
        util.error("Must specify at least one instance.")

    newdb.init_db()
    database = newdb.get_db()

    secmaster = get_symlist(opt.secmaster)
    secid2tickers = get_secid2tickers(opt.tickers_file)
    for secid, ticker in secid2tickers.items():
        if ticker not in secmaster: del secid2tickers[secid]
    universe = set(secid2tickers.values())
    massive = get_massive(opt.groups)
    dist = get_dist(opt.date, secid2tickers, massive, float(opt.overflow_mult))
    nodist = universe - set(dist.keys())
    util.info("%d symbols without data distribution: %s" %
              (len(nodist), " ".join(nodist)))
    dist.update(map(lambda k: (k, 0.0), universe - set(dist.keys())))
    if opt.keeptogether is not None:
        keep = get_symlist(opt.keeptogether)
    else:
        keep = set()
    if opt.addtoall is not None:
        all = set(opt.addtoall.split(","))
    else:
        all = set()
    assign = distribute(dist, universe, massive, insts, keep, all)
    for (vol, symset, instname) in assign:
        util.info("Instance %s sees %4.2f%% volume, trades %d symbols" %
                  (instname, vol * 100, len(symset)))

        symfile = open(instname, 'w')
        symlist = list(symset)
        symlist.sort()
        symfile.writelines(map(lambda s: s + "\n", symlist))
        symfile.close()
Beispiel #6
0
parser.add_argument("--email",
                    action="store_const",
                    const=True,
                    dest="email",
                    default=False)

args = parser.parse_args()

#Set debug
if args.debug:
    util.set_debug()
else:
    util.set_log_file()

newdb.init_db()
backoffice.database = newdb.get_db()

#Figure out from-to dates
dayDelta = datetime.timedelta(days=1)
if args.singleDate is not None:
    fromDate = datetime.datetime.strptime(args.singleDate, "%Y%m%d")
    toDate = fromDate + dayDelta
elif args.fromDate is not None and args.toDate is not None:
    fromDate = datetime.datetime.strptime(args.fromDate, "%Y%m%d")
    toDate = datetime.datetime.strptime(args.toDate, "%Y%m%d")
elif args.recent is True:
    toDate = datetime.datetime.utcnow()
    toDate = datetime.datetime.strptime(toDate.strftime("%Y%m%d"),
                                        "%Y%m%d")  #Get only date

    fromDate = toDate - dayDelta
Beispiel #7
0
                                                                      attrs[i +
                                                                            1])
                    if attrs[i]['died'] == attrs[i + 1]['born']:
                        assert attrs[i]['value'] != attrs[i + 1]['value'], (
                            attrs[i], attrs[i + 1])
                    else:
                        assert attrs[i]['died'] is None or attrs[i][
                            'died'] > attrs[i]['born'], attrs[i]

        print "Top 10 company attribute counts"
        db.execute(
            "SELECT %s, COUNT(*) AS count FROM %s GROUP BY %s ORDER BY count DESC LIMIT 10"
            % (key, table, key))
        for row in db._curs.fetchall():
            print row

        print "Top 10 attribute counts"
        db.execute(
            "SELECT a.name, COUNT(*) AS count FROM " + table +
            " JOIN attribute_type a on type = a.code GROUP BY a.name ORDER BY count DESC LIMIT 10"
        )
        for row in db._curs.fetchall():
            print row


if __name__ == "__main__":
    util.set_debug()
    newdb.init_db()
    db = newdb.get_db()
    main()
Beispiel #8
0
    def __init__(self):
        newdb.init_db()
        database = newdb.get_db()

        rows = database.execute(
            "SELECT value FROM {} WHERE xref_type=%(type)s AND source=%(source)s AND born<=%(now)s AND (died>%(now)s OR died is NULL)"
            .format(database.XREF_TABLE), {
                "type": database.getXrefType("TIC"),
                "now": util.now(),
                "source": database.getSourceType("compustat_idhist")
            }).fetchall()
        tickers = [
            row['value'] for row in rows
            if re.match("[0-9].+", row["value"]) == None
        ]
        util.info("Retrieving info on %d tickers" % len(tickers))
        database.close()

        fields = [
            "symbol",
            "name",
            "exchange",
            "error_flag",
            "market_cap",
            "avg_daily_volume",
            "ex_dividend_date",
            "dividend_pay_date",
            "dividend_share_ratio",
            "dividend_yield",

            #"ebitda",
            "earnings_share_ratio",
            "eps_est_cur_year",
            "eps_est_next_qtr",
            "eps_est_next_year",
            "pe_ratio",
            "peg_ratio",
            "price_book_ratio",
            "price_eps_est_cur_year_ratio",
            "price_eps_est_next_year_ratio",
            "price_sales_ratio",
            "short_ratio",
        ]
        # Grab data
        data = ystockquote.get_symbols(tickers, fields)
        # Save data to temp dir
        tempdir = tempfile.mkdtemp(dir=os.environ['TMP_DIR'])
        f = open("%s/yahoo.csv" % tempdir, "w")
        writer = csv.DictWriter(f, fields)
        rows = [dict(zip(fields, fields))]
        rows.extend(data.values())
        writer.writerows(rows)
        f.close()
        # Zip file
        result = os.system(
            "zip -j %s/yahoo-%s.csv.zip %s/yahoo.csv 1>/dev/null" %
            (tempdir, datetime.datetime.now().strftime("%Y%m%d%H%M"), tempdir))
        if (result != 0):
            shutil.rmtree(tempdir)
            raise DataSourceError("Could not zip file")
        os.remove("%s/yahoo.csv" % tempdir)
        self._remote_dir = tempdir