def mergeAndAdjust(symbol): with ZipFile(pjoin(const.RAW_DIR, '_' + symbol + '.csv.zip'), 'r', ZIP_DEFLATED) as _zip: files = _zip.namelist() files.sort() files.reverse() # Adjust hist. prices in 2 steps: first adjust the Adj Close backward to the oldest data... merged = [] carried_mult = 1.0 for f in files: ld = list(csv.DictReader(strio(_zip.read(f)))) if ld: if carried_mult != 1.0: for sess in ld: sess['Adj Close'] = float( sess['Adj Close']) * carried_mult carried_mult = float(ld[-1]['Adj Close']) / float( ld[-1]['Close']) merged += ld # ...and then use Adj Close to adjust the remaining values map(yAdjust, merged) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(const.ADJ_DIR, zname), 'w', ZIP_DEFLATED) as out: out.writestr(name, json.dumps(merged)) print zname
def backupSymbols(): ''' Backup current symbol files and download latest symbols from nasdaq ftp ''' files = os.listdir(SYM_DIR) for f in filter(lambda n: n.endswith('.bak'), files): os.remove(pjoin(SYM_DIR, f)) for f in filter(lambda n: n.endswith('.txt'), files): os.rename(pjoin(SYM_DIR, f), pjoin(SYM_DIR, f + '.bak'))
def mergeAndAdjust(symbol): with ZipFile(pjoin(const.RAW_DIR, '_' + symbol + '.csv.zip'), 'r', ZIP_DEFLATED) as _zip: files = _zip.namelist() files.sort() files.reverse() # Adjust hist. prices in 2 steps: first adjust the Adj Close backward to the oldest data... merged = [] carried_mult = 1.0 for f in files: ld = list(csv.DictReader(strio(_zip.read(f)))) if ld: if carried_mult != 1.0: for sess in ld: sess['Adj Close'] = float(sess['Adj Close']) * carried_mult carried_mult = float(ld[-1]['Adj Close']) / float(ld[-1]['Close']) merged += ld # ...and then use Adj Close to adjust the remaining values map(yAdjust, merged) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(const.ADJ_DIR, zname), 'w', ZIP_DEFLATED) as out: out.writestr(name, json.dumps(merged)) print zname
def reloadSymbols(): '''Load symbols and metadata into sqlite''' # see http://www.nasdaqtrader.com/trader.aspx?id=symboldirdefs with sqlite3.connect(pjoin(DB_DIR, "symbols.db")) as conn: curs = conn.cursor() tables = ('nasdaqlisted', 'otherlisted') for f in filter(lambda n: n.endswith('.txt'), os.listdir(SYM_DIR)): with open(pjoin(SYM_DIR, f), 'rb') as csvfile: reader = csv.DictReader(csvfile, delimiter="|") for row in reader: if f.split('_')[0] in tables: cols = map(lambda s: s.replace(' ', '_').upper(), sorted(row.keys())) params = (f.split('_')[0], ','.join(cols)) curs.execute("drop table if exists %s" % params[0]) create = "create table %s (%s)" % params print create curs.execute(create) break for row in reader: qms = ','.join(list('?' * len(row.keys()))) ins = 'insert into %s (%s) values (%s)' % (params[0], params[1], qms) curs.execute( ins, ([str(row[key]) for key in sorted(row.keys())])) conn.commit()
def updateEodData(): txts = filter(lambda fn: fnmatch(fn, '*listed_*.txt'), os.listdir(const.SYM_DIR)) nsdl = filter(lambda fn: fnmatch(fn, 'nasdaqlisted_*.txt'), txts)[0] allnsd = list(csv.DictReader(open(pjoin(const.SYM_DIR, nsdl), 'rb'), delimiter="|")) nsdsyms = filter(lambda s: s.isalpha(), map(lambda d: d['Symbol'], allnsd)) othl = filter(lambda fn: fnmatch(fn, 'otherlisted_*.txt'), txts)[0] allother = list(csv.DictReader(open(pjoin(const.SYM_DIR, othl), 'rb'), delimiter="|")) othsyms = filter(lambda s: s.isalpha(), map(lambda d: d['ACT Symbol'], allother)) txt_syms = nsdsyms + othsyms raw_syms = map(lambda n: n.split('.')[0][1:], os.listdir(const.RAW_DIR)) syms = list(set(txt_syms + raw_syms)) for sym in BAD_SYMS: syms.remove(sym) syms.sort() # # TEMPORARY TRUNCATION, REMOVE # LAST = 'OHAI' # syms = syms[syms.index(LAST) + 1 :] rrobin = {} for symbol in syms: mo, dy, yr, seq = getStartDate(symbol) ip2host = dllib.getIpMap() # Get target ip in round-robin fashion for ip in ip2host: if ip not in rrobin: rrobin[ip] = 0 targetip = ip else: ld = [{count : ip} for ip, count in rrobin.items()] ld = filter(lambda d: d.values()[0] in ip2host, ld) ld.sort() targetip = ld[0].values()[0] params = (targetip, symbol, mo, dy, yr) url = "http://%s/table.csv?s=%s&a=%s&b=%s&c=%s&d=11&e=31&f=2099&g=d" % params loc = urllib2.Request(url) loc.add_header('Accept-Encoding', 'gzip, deflate') loc.add_header('Host', ip2host[targetip]) opener = urllib2.build_opener() print 'requesting', url try: csv_txt = dllib.tryDecompress(opener.open(loc).read()) if list(csv.DictReader(strio(csv_txt))): rrobin[targetip] += 1 _name = '_' + symbol zname = _name + '.csv.zip' with ZipFile(pjoin(const.RAW_DIR, zname), 'a', ZIP_DEFLATED) as _zip: _zip.writestr(_name + '_' + seq + '.csv', csv_txt) print 'success', symbol mergeAndAdjust(symbol) except urllib2.HTTPError: print 'FAIL', symbol
def getHistory(*args): pool = args[0]["pool"] histpath = pjoin(const.JSON_DIR, "history.json") # Set the file's [a|m]time back to epoch to force rewrite of histpath. # os.utime(histpath, (0, 0)) base = os.path.getmtime(histpath) newer = filter(lambda f: os.path.getmtime(os.path.join(SRC, f)) > base, os.listdir(SRC)) with open(histpath, "rb") as hfile: if newer: hist = [] for symbol in map(lambda f: f.split(".")[0][1:], newer): pool.apply_async(numLines, [symbol], callback=lambda tup: hist.append({tup[0]: tup[1]})) while 1: if len(hist) < len(newer): sleep(0.5) continue else: pool.close() update = json.dumps(map(lambda t: dict(t), sorted(hist, key=lambda x: x.values()[0], reverse=True))) open(histpath, "wb").write(update) return update else: pool.close() return hfile.read()
def getMacd(params): symbol = params['symbol'][0] fast = int(params['fast'][0]) slow = int(params['slow'][0]) signal = int(params['signal'][0]) depth = int(params['depth'][0]) dpc = int(params['dpc'][0]) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(SRC, zname), 'r', ZIP_DEFLATED).open(name) as f: lst = json.loads(f.read()) lst.reverse( ) # reverse rows to be in timeline (earliest -> most) recent order fdepth = depth + ((slow + signal) * dpc) if fdepth < len(lst): lst = lst[len(lst) - fdepth:] rows = [] for row in lst: rows.append({'Date': row['Date'], 'Close': row['Close']}) if dpc > 1: data = aggregate(dpc, rows) elif dpc == 1: data = rows return json.dumps(calcMacd(fast=fast, slow=slow, signal=signal, data=data))
def getMacd(params): symbol = params['symbol'][0] fast = int(params['fast'][0]) slow = int(params['slow'][0]) signal = int(params['signal'][0]) depth = int(params['depth'][0]) dpc = int(params['dpc'][0]) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(SRC, zname), 'r', ZIP_DEFLATED).open(name) as f: lst = json.loads(f.read()) lst.reverse() # reverse rows to be in timeline (earliest -> most) recent order fdepth = depth + ((slow + signal) * dpc) if fdepth < len(lst): lst = lst[len(lst) - fdepth:] rows = [] for row in lst: rows.append({'Date':row['Date'], 'Close':row['Close']}) if dpc > 1: data = aggregate(dpc, rows) elif dpc == 1: data = rows return json.dumps(calcMacd(fast=fast, slow=slow, signal=signal, data=data))
def numLines(symbol): name = "_" + symbol + '.json' zname = name + '.zip' _zip = ZipFile(pjoin(SRC, zname), 'r', ZIP_DEFLATED) with _zip.open(name, 'r') as f: for count, unused in enumerate(json.load(f), 1): pass return (symbol, count)
def numLines(symbol): name = "_" + symbol + ".json" zname = name + ".zip" _zip = ZipFile(pjoin(SRC, zname), "r", ZIP_DEFLATED) with _zip.open(name, "r") as f: for count, unused in enumerate(json.load(f), 1): pass return (symbol, count)
def downloadSymbols(): files = ('nasdaqlisted.txt', 'otherlisted.txt') ftp = FTP('ftp.nasdaqtrader.com') ftp.login() ftp.cwd("SymbolDirectory") for f in files: mtime = ftp.sendcmd('MDTM ' + f).split()[1] name = f.split('.')[0] + '_' + mtime + '.txt' out = open(pjoin(SYM_DIR, name), 'wb') ftp.retrbinary("RETR %s" % f, out.write) out.close() ftp.quit()
def getObv(params): symbol = params['symbol'][0] depth = int(params['depth'][0]) dpc = int(params['dpc'][0]) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(SRC, zname), 'r', ZIP_DEFLATED).open(name) as f: lst = json.loads(f.read()) lst.reverse() # flip to timeline order, oldest first if depth < len(lst): lst = lst[len(lst) - depth:] if dpc > 1: return json.dumps(aggregate(dpc, lst)) elif dpc == 1: out = [] firstClose = float(lst[0]['Close']) lst = lst[1:] for n in xrange(len(lst)): obv = { 'Date' : lst[n]['Date'] } thisClose = float(lst[n]['Close']) lastClose = float(lst[n - 1]['Close']) if out: if thisClose > lastClose: obv['OBV'] = out[-1]['OBV'] + int(lst[n]['Volume']) elif thisClose < lastClose: obv['OBV'] = out[-1]['OBV'] - int(lst[n]['Volume']) else: obv['OBV'] = out[-1]['OBV'] else: if thisClose > firstClose: obv['OBV'] = int(lst[n]['Volume']) elif thisClose < firstClose: obv['OBV'] = int(lst[n]['Volume']) * -1 else: obv['OBV'] = 0 out.append(obv) return json.dumps(out)
def getObv(params): symbol = params['symbol'][0] depth = int(params['depth'][0]) dpc = int(params['dpc'][0]) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(SRC, zname), 'r', ZIP_DEFLATED).open(name) as f: lst = json.loads(f.read()) lst.reverse() # flip to timeline order, oldest first if depth < len(lst): lst = lst[len(lst) - depth:] if dpc > 1: return json.dumps(aggregate(dpc, lst)) elif dpc == 1: out = [] firstClose = float(lst[0]['Close']) lst = lst[1:] for n in xrange(len(lst)): obv = {'Date': lst[n]['Date']} thisClose = float(lst[n]['Close']) lastClose = float(lst[n - 1]['Close']) if out: if thisClose > lastClose: obv['OBV'] = out[-1]['OBV'] + int(lst[n]['Volume']) elif thisClose < lastClose: obv['OBV'] = out[-1]['OBV'] - int(lst[n]['Volume']) else: obv['OBV'] = out[-1]['OBV'] else: if thisClose > firstClose: obv['OBV'] = int(lst[n]['Volume']) elif thisClose < firstClose: obv['OBV'] = int(lst[n]['Volume']) * -1 else: obv['OBV'] = 0 out.append(obv) return json.dumps(out)
def getStartDate(sym): loc = pjoin(const.RAW_DIR, '_' + sym + '.csv.zip') if not pexists(loc): return (0, 1, 1900, '0') else: with ZipFile(loc, 'r', ZIP_DEFLATED) as _zip: files = _zip.namelist() files.sort() seq = re.split(r"[_\.]", files[-1])[2] nseq = str(int(seq) + 1) ldate = list(csv.DictReader(strio(_zip.read(files[-1]))))[0]['Date'] yr, mo, dy = map(int, ldate.split('-')) nxt = date(yr, mo, dy) + timedelta(days=1) return (nxt.month - 1, nxt.day, nxt.year, nseq)
def reloadSymbols(): '''Load symbols and metadata into sqlite''' # see http://www.nasdaqtrader.com/trader.aspx?id=symboldirdefs with sqlite3.connect(pjoin(DB_DIR, "symbols.db")) as conn: curs = conn.cursor() tables = ('nasdaqlisted', 'otherlisted') for f in filter(lambda n: n.endswith('.txt'), os.listdir(SYM_DIR)): with open(pjoin(SYM_DIR, f), 'rb') as csvfile: reader = csv.DictReader(csvfile, delimiter="|") for row in reader: if f.split('_')[0] in tables: cols = map(lambda s: s.replace(' ', '_').upper(), sorted(row.keys())) params = (f.split('_')[0], ','.join(cols)) curs.execute("drop table if exists %s" % params[0]) create = "create table %s (%s)" % params print create curs.execute(create) break for row in reader: qms = ','.join(list('?' * len(row.keys()))) ins = 'insert into %s (%s) values (%s)' % (params[0], params[1], qms) curs.execute(ins, ([str(row[key]) for key in sorted(row.keys())])) conn.commit()
def getSyms(params): if params: order = params['order'][0] if order == 'alpha': symbols = map(lambda f: f.split('.')[0].lstrip('_'), os.listdir(RAW_DIR)) elif order == 'biggest': hfile = pjoin(JSON_DIR, "history.json") hist = json.load(open(hfile, 'rb')) symbols = map(lambda m: m.keys()[0] , sorted(hist, key=lambda x: x.values()[0], reverse=True)) return json.dumps(symbols) else: return map(lambda f: f.split('.')[0].lstrip('_'), os.listdir(RAW_DIR))
def getSyms(params): if params: order = params['order'][0] if order == 'alpha': symbols = map(lambda f: f.split('.')[0].lstrip('_'), os.listdir(RAW_DIR)) elif order == 'biggest': hfile = pjoin(JSON_DIR, "history.json") hist = json.load(open(hfile, 'rb')) symbols = map( lambda m: m.keys()[0], sorted(hist, key=lambda x: x.values()[0], reverse=True)) return json.dumps(symbols) else: return map(lambda f: f.split('.')[0].lstrip('_'), os.listdir(RAW_DIR))
def getEod(params): symbol = params['symbol'][0] depth = int(params['depth'][0]) dpc = int(params['dpc'][0]) name = '_' + symbol + '.json' zname = name + '.zip' with ZipFile(pjoin(SRC, zname), 'r', ZIP_DEFLATED).open(name) as f: lst = json.loads(f.read()) lst.reverse() # flip to timeline order, oldest first if depth < len(lst): lst = lst[len(lst) - depth:] if dpc > 1: return json.dumps(aggregate(dpc, lst)) elif dpc == 1: return json.dumps(lst)
def getHistory(*args): pool = args[0]['pool'] histpath = pjoin(const.JSON_DIR, "history.json") # Set the file's [a|m]time back to epoch to force rewrite of histpath. # os.utime(histpath, (0, 0)) base = os.path.getmtime(histpath) newer = filter(lambda f: os.path.getmtime(os.path.join(SRC, f)) > base, os.listdir(SRC)) with open(histpath, 'rb') as hfile: if newer: hist = [] for symbol in map(lambda f: f.split('.')[0][1:], newer): pool.apply_async( numLines, [symbol], callback=lambda tup: hist.append({tup[0]: tup[1]})) while 1: if len(hist) < len(newer): sleep(0.5) continue else: pool.close() update = json.dumps( map( lambda t: dict(t), sorted(hist, key=lambda x: x.values()[0], reverse=True))) open(histpath, 'wb').write(update) return update else: pool.close() return hfile.read()