def __processCSHOC(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): if "CSHOC" not in attributes: return secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp) if not processSecid(secid): return if secid is None: secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp) util.warning("Created new secid: {}.{}=>{}".format( keys['GVKEY'], keys['IID'], secid)) date = util.convert_date_to_millis(keys["DATADATE"]) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) if command in ("I", "C"): database.insertAttribute("sec", "n", secid, date, source, "CSHOC", attributes["CSHOC"], timestamp, None, backfill, False, True, __CSHOCEquals) elif command in ("R", "D"): database.deleteAttribute("sec", "n", secid, date, source, "CSHOC", timestamp, False)
def _parseFile(filepath): #this should only happen when we process the first file ever if filepath is None: return set(), None, None data = set() info = datafiles.read_info_file(filepath) if info['date_last_absent'] is None: timestamp = util.convert_date_to_millis(info['date_modified']) else: timestamp = util.convert_date_to_millis(info['date_first_present']) csvfile = open(filepath) dialect = csv.Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) reader = csv.DictReader(csvfile, dialect=dialect) for row in reader: secid = database.getSecidFromCsid(row['GVKEY'], row['IID'], timestamp) if secid is None: secid = database.createNewCsid(row['GVKEY'], row['IID'], timestamp, None, None, True) util.warning("Created new secid: {}.{}=>{}".format(row['GVKEY'], row['IID'], secid)) data.add((secid, int(row["SPLITDATE"]), float(row["SPLITRATE"]))) #get the file start date from the filename startDate = os.path.normpath(filepath).split("/")[-1][0:8] #split the filepath startDate = int(startDate) return data, startDate, timestamp
def __processMkt(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): if keys['CFFLAG'] != 'F': return date = util.convert_date_to_millis(keys['DATADATE']) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) if not processCoid(coid): return for n, v in attributes.iteritems(): if n in ('MKVALT', 'PRCC', 'PRCH', 'PRCL', 'MKVALTQ', 'PRCCQ', 'PRCHQ', 'PRCLQ'): datatype = "n" value = float(v) if v is not None else None elif n in ('CLSM', 'CLSMQ'): datatype = "s" value = v else: continue if command in ("C", "I"): database.insertAttribute("co", datatype, coid, date, source, n, value, timestamp, None, backfill) elif command in ("R", "D"): database.deleteAttribute("co", datatype, coid, date, source, n, timestamp)
def __processFundamental(command, keys, attributes, timestamp, source, backfill, global_cs): if not global_cs and not (keys['INDFMT'] == 'INDL' and keys['DATAFMT'] == "STD" and keys['POPSRC'] == "D" and keys["CONSOL"] == "C"): return elif global_cs and not (keys['INDFMT'] == 'INDL' and keys['DATAFMT'] == "HIST_STD" and keys['POPSRC'] == "I" and keys["CONSOL"] == "C"): return date = util.convert_date_to_millis(keys['DATADATE']) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) for n, v in attributes.iteritems(): if n[-3:] == "_DC": continue if global_cs and n not in ("ATQ", "IBQ", "SALEQ", "OANCFY"): continue #value=float(v) if v is not None else None if command in ("C", "I"): database.insertAttribute("co", "n", coid, date, source, n, v, timestamp, None, backfill) elif command in ("D", "R"): database.deleteAttribute("co", "n", coid, date, source, n, timestamp)
def __processDesind(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): if keys['INDFMT'] != 'INDL' or keys['DATAFMT'] != "STD": return date = util.convert_date_to_millis(keys['DATADATE']) if backfill == 1: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) if not processCoid(coid): return for n, v in attributes.iteritems(): if n in ('AJEXQ', 'AJPQ', 'AJEX', 'AJP'): datatype = "n" value = float(v) if v is not None else None elif n in ('APDEDATEQ', 'FDATEQ', 'PDATEQ', 'RDQ', 'APDEDATE', 'FDATE', 'PDATE'): datatype = "d" value = util.convert_date_to_millis(v) if v is not None else None elif n in ('ACCTSTDQ', 'COMPSTQ', 'CURCDQ', 'CURNCDQ', 'DATACQTR', 'DATAFQTR', 'FQTR', 'FYEARQ', 'UPDQ', 'ACCTSTD', 'COMPST', 'CURCD', 'CURNCD', 'FYEAR', 'UPD'): datatype = "s" value = v else: continue if command in ("C", "I"): database.insertAttribute("co", datatype, coid, date, source, n, value, timestamp, None, backfill) elif command in ("R", "D"): database.deleteAttribute("co", datatype, coid, date, source, n, timestamp)
def __processHgic(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): if keys['INDTYPE'] != 'GICS': return date = -1L dateFrom = util.convert_date_to_millis(keys['INDFROM']) if "INDTHRU" in attributes and attributes["INDTHRU"] is not None: dateTo = util.convert_date_to_millis( __datePlusOne(attributes["INDTHRU"])) else: dateTo = None coid = int(keys["GVKEY"]) if not processCoid(coid): return #born = dateval #XXX why do we do this??? for n, v in attributes.iteritems(): if n not in ("GSUBIND", "GGROUP", "GIND", "GSECTOR"): continue if command in ("C", "I"): database.deleteAttribute("co", "s", coid, date, source, n, dateFrom, True) database.insertAttribute("co", "s", coid, date, source, n, v, dateFrom, dateTo, backfill, True) elif command in ("R", "D"): database.deleteAttribute("co", "s", coid, date, source, n, dateFrom, True)
def singleDayReconcile(date, old): morgan = MorganStanleyPositions() morganPositions = morgan.getPositions(date) if old: us = OldSystemPositions() else: us = NewSystemPositions() usPositions = us.getPositions(date) #maps=getIdMaps(date) result = reconcile(usPositions, morganPositions, util.convert_date_to_millis(date)) return __beautify(*result, timestamp=util.convert_date_to_millis(date))
def __processExchange(command, keys, attributes, timestamp, source, backfill): if backfill: timestamp = min( timestamp, util.convert_date_to_millis(__datePlusOne(keys["DATADATE"]))) code = database.getAttributeType("EXRATE", source, "n", database.EXRATE) updates = (0, 0) if command in ("I", "C") and "EXRATD" in attributes: updates = database.insertTimelineRow( database.EXRATE, { "currency": database.getCurrencyType(keys["TOCURD"]), "date": int(keys["DATADATE"]) }, { "backfill": backfill, "rate": float(attributes["EXRATD"]) }, timestamp) elif command in ("D", "R"): updates = database.killOrDeleteTimelineRow( database.EXRATE, { "currency": database.getCurrencyType(keys["TOCURD"]), "date": int(keys["DATADATE"]) }, timestamp) database.updateAttributeStats(code, *updates)
def __processDividend(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp) if not processSecid(secid): return if secid is None: secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp) util.warning("Created new secid: {}.{}=>{}".format( keys['GVKEY'], keys['IID'], secid)) if backfill == 1: timestamp = min( timestamp, util.convert_date_to_millis(__datePlusOne(keys["DATADATE"]))) code = database.getAttributeType("DIVIDEND", source, "n", database.DIVIDEND) updates = (0, 0) if command in ("I"): data = { "backfill": backfill, "currency": database.getCurrencyType(keys["CURCDDV"]) } #get the data that we track and translate them to our own names. make also sure that you get the attribute types right for k, v in __usToCsDividendTranslate.iteritems(): value = attributes.get(v, None) if value is not None: data[k] = float(value) else: data[k] = value #i.e., None #finally do the insertion updates = database.insertTimelineRow(database.DIVIDEND, { "secid": secid, "date": int(keys["DATADATE"]) }, data, timestamp) elif command in ("R"): updates = database.killOrDeleteTimelineRow( database.DIVIDEND, { "secid": secid, "date": int(keys["DATADATE"]) }, timestamp) elif command in ("C", "D"): data = { "backfill": backfill, "currency": database.getCurrencyType(keys["CURCDDV"]) } for n, v in attributes.iteritems( ): #for each attribute from the compustat line if n in __csToUsDividendTranslate: #if it is among the ones we track ourName = __csToUsDividendTranslate[n] if v is not None: #else data[ourName] = float(v) else: data[ourName] = None #i.e None updates = database.updateTimelineRow(database.DIVIDEND, { "secid": secid, "date": int(keys["DATADATE"]) }, data, timestamp) database.updateAttributeStats(code, *updates)
def get_dist(date, secid2tickers, massive, overflow_mult): global database total = 0.0 dist = {} volsumFile = "/".join( (os.environ["DATA_DIR"], "bars", str(date), "volsum.txt")) if not os.path.isfile(volsumFile): raise Exception( "Problem finding historical volume data in file {}".format( volsumFile)) with open(volsumFile, "r") as f: #skip the header f.readline() for line in f: tokens = line.strip().split("|") secid = tokens[0] ticker = secid2tickers.get(secid, None) if ticker is None: ticker = database.getXrefFromSecid( "TIC", int(secid), util.convert_date_to_millis(date)) if ticker is None: continue mult = 1 if ticker not in massive: mult = overflow_mult vol = mult * sum([float(field) for field in tokens[1:]]) total += vol if vol > 0: dist[ticker] = vol for ticker in dist.keys(): dist[ticker] = dist[ticker] / total return dist
def __processCredit(command, keys, attributes, timestamp, source, backfill): date = util.convert_date_to_millis(keys["DATADATE"]) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) for n, v in attributes.iteritems(): if n not in ('SPLTICRM', 'SPSTICRM', 'SPSDRM'): continue if command in ("C", "I"): database.insertAttribute("co", "s", coid, date, source, n, v, timestamp, None, backfill) elif command in ("R", "D"): database.deleteAttribute("co", "s", coid, date, source, n, timestamp)
def process(filepath, source): info = datafiles.read_info_file(filepath) if info["date_last_absent"] is not None: backfill = 0 timestamp = util.convert_date_to_millis(info["date_first_present"]) else: backfill = 1 timestamp = util.convert_date_to_millis(info["date_modified"]) database.setAttributeAutoCreate(True) bad = 0 data = util.csvdict(open(filepath)) for row in data: ticker = row["Symbol"] secid = database.getSecidFromXref("TIC", ticker, timestamp, "compustat_idhist", newdb.xrefsolve.preferUS) if secid is None: continue try: date = util.convert_date_to_millis(row["Record_Date"]) except: util.warning("Bad date for row: " + str(row)) bad += 1 if bad > 20: util.error( str(bad) + " bad lines found. Raising excpeption. Go check file " + filepath) raise Exception( str(bad) + " bad lines found. Raising excpeption. Go check file " + filepath) for sqAtt, ourAtt in attributeMap.iteritems(): name = ourAtt[0] compareWithRecent = ourAtt[1] value = row[sqAtt] if value == '': value = None database.insertAttribute("sec", "n", secid, date, source, name, value, timestamp, None, backfill, False, compareWithRecent, approximatelyEqual)
def __processIndustry(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): date = util.convert_date_to_millis(keys["DATADATE"]) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) if not processCoid(coid): return for n, v in attributes.iteritems(): if n not in ('NAICSH', 'SICH'): continue if command in ("C", "I"): database.insertAttribute("co", "s", coid, date, source, n, v, timestamp, None, backfill) elif command in ("R", "D"): database.deleteAttribute("co", "s", coid, date, source, n, timestamp)
def process(filepath, source): date = os.path.basename(filepath).split('.')[2] born = date + " 09:30 EST" date_millis = util.convert_date_to_millis(date) born_millis = util.convert_date_to_millis(born) # If we have acquisition times, use these for real born_millis time info = datafiles.read_info_file(filepath) if info['date_last_absent'] is not None: born = util.convert_date_to_millis(info['date_first_present']) backfill = 0 else: born = util.convert_date_to_millis(date + " 09:30 EST") backfill = 1 database.setAttributeAutoCreate(True) for line in file(filepath): handle_htb(line, date_millis, born_millis, backfill)
def xrefChanges2(date1=None, date2=None): #read the secids we are interested in. we might want to change the source uni = set() #with open("/".join((os.environ["ROOT_DIR"],"run",os.environ["STRAT"],"old.secids.txt")),"r) as file: with open("/apps/ase/run/useq-live/old.secids.txt", "r") as file: for line in file: tokens = line.strip().split("|") uni.add(tokens[2]) if date2 is None: date2 = util.now() if date1 is None: date1 = util.exchangeTradingOffset( os.environ["PRIMARY_EXCHANGE"], util.convert_millis_to_datetime(date2).strftime("%Y%m%d"), -1) date1 = util.convert_date_to_millis(str(date1)) tickerChanges = [] cusipChanges = [] for secid in uni: ticker1 = database.execute( "SELECT value FROM xref WHERE secid={secid} AND xref_type=2 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})" .format(secid=secid, date=date1)).fetchone() ticker1 = ticker1["value"] if ticker1 is not None else None ticker2 = database.execute( "SELECT value FROM xref WHERE secid={secid} AND xref_type=2 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})" .format(secid=secid, date=date2)).fetchone() ticker2 = ticker2["value"] if ticker2 is not None else None cusip1 = database.execute( "SELECT value FROM xref WHERE secid={secid} AND xref_type=1 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})" .format(secid=secid, date=date1)).fetchone() cusip1 = cusip1["value"] if cusip1 is not None else None cusip2 = database.execute( "SELECT value FROM xref WHERE secid={secid} AND xref_type=1 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})" .format(secid=secid, date=date2)).fetchone() cusip2 = cusip2["value"] if cusip2 is not None else None if ticker1 != ticker2: tickerChanges.append((secid, ticker1, ticker2)) if cusip1 != cusip2: cusipChanges.append((secid, cusip1, cusip2)) report = [] report.append("Xref changes between {} and {}".format( util.convert_millis_to_datetime(date1).strftime("%Y%m%d"), util.convert_millis_to_datetime(date2).strftime("%Y%m%d"))) for secid, x1, x2 in tickerChanges: report.append("{}: {} => {}".format(secid, x1, x2)) for secid, x1, x2 in cusipChanges: report.append("{}: {} => {}".format(secid, x1, x2)) return "\n".join(report) if len(report) > 1 else None
def __processFiledate(command, keys, attributes, timestamp, source, backfill, global_cs): if keys['SRCTYPE'] not in ('10Q', '10K'): return date = util.convert_date_to_millis(keys['DATADATE']) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) for n, v in attributes.iteritems(): if n not in ("FILEDATE"): continue if command in ("C", "I"): database.insertAttribute("co", "d", coid, date, source, n, util.convert_date_to_millis(v), timestamp, None, backfill) elif command in ("R", "D"): database.deleteAttribute("co", "d", coid, date, source, n, timestamp)
def __getBorrows(uni, date): borrows = {} args = {"secid": None, "date": util.convert_date_to_millis(date)} for secid in uni: args["secid"] = secid row = database.execute( "SELECT value FROM sec_attr_n WHERE secid=%(secid)s AND date=%(date)s AND type=2210 AND died IS NULL ORDER BY date DESC LIMIT 1", args).fetchone() if row is not None: borrows[secid] = row["value"] return borrows
def __processIndustry(command, keys, attributes, timestamp, source, backfill, global_cs): if not global_cs and not (keys['POPSRC'] == "D" and keys["CONSOL"] == "C"): return elif global_cs and not (keys['POPSRC'] == "I" and keys["CONSOL"] == "C"): return date = util.convert_date_to_millis(keys["DATADATE"]) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) for n, v in attributes.iteritems(): if n not in ('NAICSH', 'SICH'): continue if command in ("C", "I"): database.insertAttribute("co", "s", coid, date, source, n, v, timestamp, None, backfill) elif command in ("R", "D"): database.deleteAttribute("co", "s", coid, date, source, n, timestamp)
def __processFundamental(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): if keys['INDFMT'] != 'INDL' or keys['DATAFMT'] != "STD": return date = util.convert_date_to_millis(keys['DATADATE']) if backfill: timestamp = util.convert_date_to_millis(__datePlusOne( keys["DATADATE"])) coid = int(keys["GVKEY"]) if not processCoid(coid): return for n, v in attributes.iteritems(): if n[-3:] == "_DC": continue #value=float(v) if v is not None else None if command in ("C", "I"): database.insertAttribute("co", "n", coid, date, source, n, v, timestamp, None, backfill) elif command in ("D", "R"): database.deleteAttribute("co", "n", coid, date, source, n, timestamp)
def process(filepath, source): sourceNameInDatabase = "onlineinvestor" info = datafiles.read_info_file(filepath) if "hist" in source: backfill = 1 #timestamp will be data dependent else: backfill = 0 timestamp = util.convert_date_to_millis(info["date_modified"]) database.setAttributeAutoCreate(True) with open(filepath, "r") as file: for line in file: tokens = line.split("\t") date = util.convert_date_to_millis(tokens[0]) ticker = tokens[1] notes = tokens[2] if backfill == 1: born = date else: born = timestamp secid = database.getSecidFromXref("TIC", ticker, date, "compustat", newdb.xrefsolve.preferUS) if secid is None: util.warning("Failed to map ticker {},{}".format( ticker, tokens[0])) return coid, issueid = database.getCsidFromSecid(secid) assert coid is not None database.insertAttribute("co", "s", coid, date, sourceNameInDatabase, "BUYBACK", notes, born, None, backfill)
def getPositions(self, date): #for date the sod positions are in date+1 #sodPortPath = os.environ["ROOT_DIR"] + "/run/" + os.environ["STRAT"] + "/" + str(util.exchangeTradingOffset(os.environ["PRIMARY_EXCHANGE"], date.strftime("%Y%m%d"), 1)) + "/sodPort.txt" sodPortPath = "/apps/ase/run/useq-live/" + str( util.exchangeTradingOffset(os.environ["PRIMARY_EXCHANGE"], date.strftime("%Y%m%d"), 1)) + "/sodPort.txt" if not os.path.isfile(sodPortPath): raise PositionSourceError( "NewSystemPositions failed to locate sodPort.txt") positions = list() with open(sodPortPath, "r") as file: #skip header file.readline() for line in file: if len(line) == 0: continue tokens = line.strip().split("|") secid = int(tokens[1]) size = int(tokens[2]) price = float(tokens[3]) cusip = database.getXrefFromSecid( "CUSIP", secid, util.convert_date_to_millis(date), "compustat_idhist") ticker = database.getXrefFromSecid( "TIC", secid, util.convert_date_to_millis(date), "compustat_idhist") isin = database.getXrefFromSecid( "ISIN", secid, util.convert_date_to_millis(date), "compustat_g_idhist") position = Position(secid, cusip, ticker, isin, size, price) positions.append(position) return positions
def __processSecurity(command, keys, attributes, timestamp, source, backfill, processCoid, processSecid): secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp) if not processSecid(secid): return if secid is None: secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp) util.warning("Created new secid: {}.{}=>{}".format( keys['GVKEY'], keys['IID'], secid)) for n, v in attributes.iteritems(): if n in ("CUSIP", "ISIN", "SEDOL", "TIC"): if command in ("C", "I"): #database.insertTimelineRow(database.XREF_TABLE, {"secid":secid, "xref_type":database.getXrefType(n), "source":database.getSourceType(source)}, {"value":v}, timestamp) database.insertXref(secid, source, n, v, timestamp) elif command in ("D", "R"): #database.killOrDeleteTimelineRow(database.XREF_TABLE, {"secid":secid, "xref_type":database.getXrefType(n), "source":database.getSourceType(source)}, timestamp) database.deleteXref(secid, source, n, timestamp) elif n in ("SECSTAT", "TPCI", "EXCNTRY"): date = 0L if command in ("C", "I"): database.insertAttribute("sec", "s", secid, date, source, n, v, timestamp, None, backfill) elif command in ("D", "R"): database.deleteAttribute("sec", "s", secid, date, source, n, timestamp) elif n in ("EXCHG"): date = 0L if command in ("C", "I"): database.insertAttribute("sec", "n", secid, date, source, n, v, timestamp, None, backfill) elif command in ("D", "R"): database.deleteAttribute("sec", "n", secid, date, source, n, timestamp) elif n in ("DLDTEI"): date = 0L if command in ("C", "I"): database.insertAttribute("sec", "d", secid, date, source, n, util.convert_date_to_millis(v), timestamp, None, backfill) elif command in ("D", "R"): database.deleteAttribute("sec", "d", secid, date, source, n, timestamp)
def parseCostLog(data): res = [] eastern = pytz.timezone('US/Eastern') for line in data: tokens = line.strip().split() if tokens[3] != "REQ": continue long_ts = (tokens[0] + " " + tokens[1])[0:-7] long_ts = datetime.datetime.strptime(long_ts, "%Y/%m/%d %H:%M:%S") long_ts = eastern.localize(long_ts) ts = util.convert_date_to_millis(long_ts) m = re.match(r"\[orderID: (.*?)\]", tokens[-2] + " " + tokens[-1]) if m is None: util.warning("Failed to parse REQ line: {}".format(line)) continue orderid = long(m.group(1)) if orderid > 0: res.append(str(orderid) + "|" + str(ts)) return res
def __processSplit(command, keys, attributes, timestamp, source, backfill, global_cs): if "SPLIT" not in attributes: return if global_cs and not keys["IID"].endswith("W"): return elif not global_cs and keys["IID"].endswith("W"): return secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp) if secid is None: secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp) util.warning("Created new secid: {}.{}=>{}".format( keys['GVKEY'], keys['IID'], secid)) if backfill: timestamp = min( timestamp, util.convert_date_to_millis(__datePlusOne(keys["DATADATE"]))) code = database.getAttributeType("SPLIT", source, "n", database.SPLIT) updates = (0, 0) if command in ("I", "C"): updates = database.insertTimelineRow(database.SPLIT, { "secid": secid, "date": int(keys["DATADATE"]) }, { "backfill": backfill, "rate": float(attributes["SPLIT"]) }, timestamp) elif command in ("D", "R"): updates = database.killOrDeleteTimelineRow( database.SPLIT, { "secid": secid, "date": int(keys["DATADATE"]) }, timestamp) database.updateAttributeStats(code, *updates)
file_path = os.path.normpath(file_path_info[0:-5]) #file_path_rel = file_path.replace("%s/%s/" % (os.environ["DATA_DIR"], sconfig['local_dir']), "") file_path_rel = os.path.relpath( file_path, "/".join( (os.environ["DATA_DIR"], sconfig["local_dir"]))) if file_path_rel not in seen: info = datafiles.read_info_file(file_path) # If we don't have reliable acquisition times (first fetch), use modified timestamp if info['date_last_absent'] is None: date_released = info['date_modified'] else: date_released = info['date_first_present'] #if we are processing using lag, do not add file if options.lag is not None and ( util.now() - util.convert_date_to_millis( datetime.timedelta(days=options.lag)) < util.convert_date_to_millis(date_released)): continue util.info("Found new file:< %s" % file_path) files.append({ 'path': file_path, 'path_rel': file_path_rel, 'date': (date_released, info['date_modified']) }) util.info("Found %d files" % len(files)) if len(files) == 0: util.warning("Done indexing, no files found") continue
def __processHistorical(filepath, source): #currentFileInfo = datafiles.read_info_file(filepath) #currentFileDate= currentFileInfo['date_first_present'] #currentFileTimestamp = util.convert_date_to_millis(currentFileDate) #set autocreate attributes autocreate = database.getAttributeAutoCreate() database.setAttributeAutoCreate(True) (newData, removedData, firstLoad) = __getDeltas(filepath, source) ######process the deltas########## ####Important!!!! To maintain consistency they should processed in ascending effective date removedData = list(removedData) removedData.sort(key=lambda x: x.split("|")[4]) for line in removedData: info = __lineToDict(line) born = util.convert_date_to_millis(info["start"]) died = None if info["end"] is None else util.convert_date_to_millis( info["end"]) #check row quality if died is not None and died <= born: continue if info["attributeValue"] == "": continue util.debug("Getting/Inserting Security {}.{}, {}".format( info["coid"], info["issueid"], born)) secid = __getOrCreateSecid(info["coid"], info["issueid"], info["country"], info["currency"], born) if info["attributeName"] == "EXCHG": __deleteExchangeHistorical(secid, info["attributeName"], info["attributeValue"], source, born) else: __deleteXrefHistorical(secid, info["attributeName"], source, born) ######process the deltas########## ####Important!!!! To maintain consistency they should processed in ascending effective date newData = list(newData) newData.sort(key=lambda x: x.split("|")[4]) for line in newData: info = __lineToDict(line) born = util.convert_date_to_millis(info["start"]) died = None if info["end"] is None else util.convert_date_to_millis( info["end"]) #check row quality if died is not None and died <= born: continue if info["attributeValue"] == "": continue util.debug("Getting/Inserting Security {}.{}, {}".format( info["coid"], info["issueid"], born)) secid = __getOrCreateSecid(info["coid"], info["issueid"], info["country"], info["currency"], born) if info["attributeName"] == "EXCHG": __insertExchangeHistorical(secid, info["attributeName"], info["attributeValue"], source, born, died, 1) else: __insertXrefHistorical(secid, info["attributeName"], info["attributeValue"], source, born, died) #revert attribute autocreate database.setAttributeAutoCreate(autocreate)
if len(subdirs) == 0: errors.append("{}: Never received a file".format( sourceConfigFile[:-3])) continue subdir = subdirs[-1] acquireTimestamp = 0L for node in os.walk(sourceLocalDir + "/" + subdir): dir = node[0] files = node[2] for file in files: if ".info" in file or ".time" in file or ".new" in file: continue info = datafiles.read_info_file(dir + "/" + file) timestamp = util.convert_date_to_millis( info["date_first_present"]) if timestamp > acquireTimestamp: acquireTimestamp = timestamp now = util.now() checkTimestamp = util.convert_date_to_millis( cPickle.load(open(timeFile, 'rb'))) #get the frequency with which we expect new data expectedNewDataFrequency = sc.get("new_data_frequency", defaultNewDataFrequency) checkHours = (now - checkTimestamp) / (60 * 60 * 1000) checkMins = ((now - checkTimestamp) % (60 * 60 * 1000)) / (60 * 1000) acquireHours = (now - acquireTimestamp) / (60 * 60 * 1000) acquireMins = ((now - acquireTimestamp) % (60 * 60 * 1000)) / (60 * 1000)
for mufile in sorted(os.listdir(musdir)): util.info("Processing file {}/{}".format(musdir, mufile)) tokens = mufile.split(".") if not tokens[0] == "mus": continue ts_token = 1 mus_type = 0 if tokens[1] == "FULL": ts_token += 1 mus_type = 2 elif tokens[1] == "SHORT": ts_token += 1 mus_type = 1 if mus_type != 2: continue mus_ts = util.convert_date_to_millis(datetime.datetime.strptime(tokens[ts_token], "%Y%m%d_%H%M")) if (not args.force) and (mus_ts <= max_ts_in_db): continue row = {} with open(musdir + "/" + mufile, "r") as file: for line in file: tokens = line.strip().split("|") secid = int(tokens[0]) fc = tokens[1] fc_type = database.getAttributeType(fc, "mus", "n", "mus") value = float(tokens[2]) buffer.append((secid, fc_type, mus_ts, value))
fs = file_source.FileSource('/apps/exec/log/rts1/') listing = fs.list_recursive('cost.log', sizes=False) start = dateutil.parser.parse(sys.argv[1] + "01") end = start + dateutil.relativedelta.relativedelta(months=1) dt = start rebates = 0.0 mils = float(sys.argv[2]) while dt < end: for row in listing: if row[0].find(dt.strftime("%Y%m%d")) == -1: continue for line in open(row[0]): if line.find("FILL") != -1: (date, type, sym, ecn, type, size, price, bid, ask, liq) = line.split() date = util.convert_date_to_millis(date) type = int(type) size = int(size) price = float(price) bid = float(bid) ask = float(ask) if liq != 'remove': if ecn == "ISLD": rebates += mils * abs(size) dt += datetime.timedelta(days=1) print start.strftime('%Y%m'), rebates
def process(filePath, source, verifyOnly=False): #process the RSK files for now if filePath.find(".RSK.") < 0: return file = open(filePath, "r") #The first 2 lines should be the pricedate and the modeldate tokens = file.readline().strip().split(":") if tokens[0] != "PriceDate": util.error("It doesn't seem like a barra daily format") raise Exception else: priceDate = __barraDateToCompact(tokens[1].strip()) tokens = file.readline().strip().split(":") if tokens[0] != "ModelDate": util.error("It doesn't seem like a barra daily format") raise Exception else: #pass modelDate = __barraDateToCompact(tokens[1].strip()) # If we have acquisition times, use these for real born time. # Else, use the priceDate + 1 day fileInfo = datafiles.read_info_file(filePath) if fileInfo['date_last_absent'] is not None: timestamp = util.convert_date_to_millis(fileInfo['date_first_present']) backfill = 0 database.setAttributeAutoCreate(True) else: date = priceDate + datetime.timedelta(days=1) timestamp = util.convert_date_to_millis(date.strftime("%Y%m%d")) backfill = 1 database.setAttributeAutoCreate(True) #get the header names. comma separated, surrounded by double quotes line = file.readline() headers = __getListFromBarraLine(line) #init the dabase #database.dropXrefCache() #database.addXrefCache(timestamp) #cache xrefs #######MAPPING VERIFICATION CODE######## inconcistentMappings = [] ######################################## for line in file: data = __getListFromBarraLine(line) if len(data) != len(headers): util.warning("Skipping bad line: {}".format(line)) continue data = dict(zip(headers, data)) #######MAPPING VERIFICATION CODE######## if verifyOnly: result = __verifyMapping( data["BARRID"], util.cusip8to9(data["CUSIP"]), data["TICKER"], source, timestamp, newdb.xrefsolve.preferUS) #mirror the getSecid call if result is not None: inconcistentMappings.append(result) continue ######################################## secid = __getSecId(data["BARRID"], util.cusip8to9(data["CUSIP"]), data["TICKER"], source, timestamp, newdb.xrefsolve.preferUS, filePath) if secid is None: continue #Now, insert barra attributes and attribute values __removeUnwantedAttributes(data) for attributeName, attributeValue in data.iteritems(): if isinstance(attributeValue, str): table = "s" elif isinstance(attributeValue, int): table = "n" elif isinstance(attributeValue, float): table = "n" else: util.error( "Dude, attribute values should be either int,float or str") raise #assert attributeName.startswith("INDNAME") and table=="s" #With the exeption of capitalization and price, the other barra attributes #are attributes that are evaluated monthly. for them, the date should be the #model date. price we ignore, while capitatlization, we only create a new tuple #if the capitalization has changed more than a threshould since the last date #for which we have a tuple if attributeName == "PRICE": continue elif attributeName == "CAPITALIZATION": database.insertAttribute( "sec", "n", secid, util.convert_date_to_millis(priceDate), source, attributeName, attributeValue, timestamp, None, backfill, False, True, __capEquals) else: database.insertAttribute( "sec", table, secid, util.convert_date_to_millis(modelDate), source, attributeName, attributeValue, timestamp, None, backfill) file.close() #######MAPPING VERIFICATION CODE######## if verifyOnly: return inconcistentMappings