def findDupnrename(parentFolder): # Dups in format {hash:[names]} dups = {} for dirName, subdirs, fileList in os.walk(parentFolder): print('Scanning %s...' % dirName) for filename in fileList: # Get the path to the file path = os.path.join(dirName, filename) dirdetails = (((dirName.replace(stemdir, '')).replace( '\\', ' '))).upper().split() udetails = rpu_rp.uniqArray(dirdetails) lena = len(udetails) newfname = '' c = 0 while c < lena: newfname += udetails[c] newfname += ' ' c += 1 newfname += filename print newfname wdirname = outputtest + newfname ## shutil.copyfile(path,wdirname)# Calculate hash file_hash = hashfile(path) # Add or append the file path if file_hash in dups: dups[file_hash].append(path.replace(stemdir, '')) else: dups[file_hash] = [path.replace(stemdir, '')] return dups
def findDupnrename(parentFolder): # Dups in format {hash:[names]} dups = {} for dirName, subdirs, fileList in os.walk(parentFolder): print('Scanning %s...' % dirName) for filename in fileList: # Get the path to the file path = os.path.join(dirName, filename) dirdetails = (((dirName.replace(stemdir,'')).replace('\\',' '))).upper().split() udetails = rpu_rp.uniqArray(dirdetails) lena = len(udetails) newfname = '' c=0 while c < lena: newfname += udetails[c] newfname += ' ' c+=1 newfname += filename print newfname wdirname = outputtest + newfname ## shutil.copyfile(path,wdirname)# Calculate hash file_hash = hashfile(path) # Add or append the file path if file_hash in dups: dups[file_hash].append(path.replace(stemdir,'')) else: dups[file_hash] = [path.replace(stemdir,'')] return dups
def createVolHistogram(sym, fnums): filein = DataDown + date + '.' + sym + '.RTtickData.csv' totlen = len(rpu_rp.CsvToLines(filein)) thirdlen = int(totlen / 3) tails = [thirdlen, thirdlen * 2, totlen] for tailvalue in tails: print totlen sleep(4) prevtotdiff = ask = bid = 0 string = '' time = '' size = '' tprice = 0.0 tsize = singletrade = totdaysize = vwap = '' totdowns = totups = 0 tottickups = 0 pricearrayraw = [] pnsize = [] lastsize = lastprice = bidsize = asksize = 9999 for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein), tailvalue): if 'tickString' in str(line) and 'tickType=48' in str(line): pnsizeline = [] ## print line time = line[3] string = line[2].split(';') if len(string) > 2: ## print string tprice = float(string[0].replace('value=', '')) tsize = float(string[1]) singletrade = string[5] totdaysize = string[3] vwaps = string[4] if len(vwaps) > 0: vwap = round(float(string[4]), 2) else: vwap = 0.0 ## value=1984.50;1;1441796865858;207069;1982.49963539;true> ## print tsize,tprice,time,'trade', bid, ask, bidsize, asksize pricearrayraw.append(tprice) pnsizeline.append(tprice) pnsizeline.append(tsize) pnsize.append(pnsizeline) uarray = rpu_rp.uniqArray(pricearrayraw) ldiv = 1000 print "\n" * 50 strings = '' for price in uarray: lvol = 0.0 for l in pnsize: if price == l[0] and price > 0.0: lvol += l[1] if lvol > 12000: ## print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l')) strings += ('avg %6.2f ... %4d .. %s\n' % (price, lvol, int(lvol / ldiv) * '|')) print strings
def createVolHistogram(sym,fnums): filein = DataDown +date+'.'+sym+ '.RTtickData.csv' totlen = len(rpu_rp.CsvToLines(filein)) thirdlen = int(totlen/3) tails = [thirdlen,thirdlen*2,totlen] for tailvalue in tails: print totlen sleep(4) prevtotdiff = ask = bid =0 string ='' time = '' size = '' tprice = 0.0 tsize = singletrade = totdaysize= vwap ='' totdowns = totups = 0 tottickups=0 pricearrayraw =[] pnsize =[] lastsize = lastprice = bidsize = asksize = 9999 for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein),tailvalue): if 'tickString' in str(line) and 'tickType=48' in str(line): pnsizeline =[] ## print line time = line[3] string = line[2].split(';') if len(string) > 2: ## print string tprice = float(string[0].replace('value=','')) tsize = float(string[1]) singletrade = string[5] totdaysize = string[3] vwaps = string[4] if len(vwaps) > 0: vwap = round(float(string[4]),2) else: vwap =0.0 ## value=1984.50;1;1441796865858;207069;1982.49963539;true> ## print tsize,tprice,time,'trade', bid, ask, bidsize, asksize pricearrayraw.append(tprice) pnsizeline.append(tprice) pnsizeline.append(tsize) pnsize.append(pnsizeline) uarray = rpu_rp.uniqArray(pricearrayraw) ldiv = 1000 print "\n" * 50 strings ='' for price in uarray: lvol =0.0 for l in pnsize: if price == l[0] and price > 0.0 : lvol += l[1] if lvol > 12000 : ## print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l')) strings += ('avg %6.2f ... %4d .. %s\n' % (price,lvol,int(lvol/ldiv)*'|')) print strings
def createVolHistogramdload(sym, fnums, ldiv, roundfactor): filein = DataDown + date + '.' + sym + '.1min.both.csv' totlen = len(rpu_rp.CsvToLines(filein)) thirdlen = int(totlen / 3) tails = [thirdlen, thirdlen * 2, totlen] for tailvalue in tails: print totlen, thirdlen sleep(2) prevtotdiff = ask = bid = 0 string = '' time = '' size = '' tprice = 0.0 tsize = singletrade = totdaysize = vwap = '' totdowns = totups = 0 tottickups = 0 pricearrayraw = [] pnsize = [] lastsize = lastprice = bidsize = asksize = 9999 for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein), tailvalue): if len(line) > 2 and '2015-12-10' in str(line): ## print line pnsizeline = [] time = line[1] tprice = float(line[5].replace('value=', '')) tprice = round(float(line[5].replace('value=', '')), roundfactor) tsize = float(line[6]) ## singletrade = string[5] ## totdaysize = string[3] ## vwaps = string[4] vwap = 0.0 ## value=1984.50;1;1441796865858;207069;1982.49963539;true> ## print tsize,tprice,time,'trade', bid, ask, bidsize, asksize pricearrayraw.append(tprice) pnsizeline.append(tprice) pnsizeline.append(tsize) pnsize.append(pnsizeline) uarray = rpu_rp.uniqArray(pricearrayraw) ## ldiv = 200 print "\n" * 50 strings = '' for price in uarray: lvol = 0.0 for l in pnsize: if price == l[0] and price > 0.0: lvol += l[1] if lvol > 0: ## print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l')) strings += ('avg %6.2f ... %4d .. %s\n' % (price, lvol, int(lvol / ldiv) * '|')) print strings
def createVolHistogramdload(sym,fnums,ldiv,roundfactor): filein = DataDown +date+'.'+sym+ '.1min.both.csv' totlen = len(rpu_rp.CsvToLines(filein)) thirdlen = int(totlen/3) tails = [thirdlen,thirdlen*2,totlen] for tailvalue in tails: print totlen, thirdlen sleep(2) prevtotdiff = ask = bid =0 string ='' time = '' size = '' tprice = 0.0 tsize = singletrade = totdaysize= vwap ='' totdowns = totups = 0 tottickups=0 pricearrayraw =[] pnsize =[] lastsize = lastprice = bidsize = asksize = 9999 for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein),tailvalue): if len(line) > 2 and '2015-12-10' in str(line): ## print line pnsizeline =[] time = line[1] tprice = float(line[5].replace('value=','')) tprice = round(float(line[5].replace('value=','')),roundfactor) tsize = float(line[6]) ## singletrade = string[5] ## totdaysize = string[3] ## vwaps = string[4] vwap =0.0 ## value=1984.50;1;1441796865858;207069;1982.49963539;true> ## print tsize,tprice,time,'trade', bid, ask, bidsize, asksize pricearrayraw.append(tprice) pnsizeline.append(tprice) pnsizeline.append(tsize) pnsize.append(pnsizeline) uarray = rpu_rp.uniqArray(pricearrayraw) ## ldiv = 200 print "\n" * 50 strings ='' for price in uarray: lvol =0.0 for l in pnsize: if price == l[0] and price > 0.0 : lvol += l[1] if lvol > 0 : ## print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l')) strings += ('avg %6.2f ... %4d .. %s\n' % (price,lvol,int(lvol/ldiv)*'|')) print strings
def createVolHistogram(sym, fnums): ## fullarray =[] ## today = '20151013' filein = DataDown + today + '.' + sym + '.RTtickData.csv' prevtotdiff = ask = bid = 0 string = '' time = '' size = '' tprice = 0.0 tsize = singletrade = totdaysize = vwap = '' totdowns = totups = 0 tottickups = 0 pricearrayraw = [] pnsize = [] lastsize = lastprice = bidsize = asksize = 9999 for line in rpu_rp.tail_array_to_array(rpu_rp.CsvToLines(filein), 444): if 'tickString' in str(line) and 'tickType=48' in str(line): pnsizeline = [] ## print line time = line[3] string = line[2].split(';') if len(string) > 2: ## print string tprice = float(string[0].replace('value=', '')) tsize = float(string[1]) singletrade = string[5] totdaysize = string[3] vwaps = string[4] if len(vwaps) > 0: vwap = round(float(string[4]), 2) else: vwap = 0.0 ## value=1984.50;1;1441796865858;207069;1982.49963539;true> ## print tsize,tprice,time,'trade', bid, ask, bidsize, asksize pricearrayraw.append(tprice) pnsizeline.append(tprice) pnsizeline.append(tsize) pnsize.append(pnsizeline) uarray = rpu_rp.uniqArray(pricearrayraw) for price in uarray: lvol = 0.0 for l in pnsize: if price == l[0]: lvol += l[1] if lvol > 6000: print price, lvol
def createVolHistogram(sym,fnums): ## fullarray =[] ## today = '20151013' filein = DataDown +today+'.'+sym+ '.RTtickData.csv' prevtotdiff = ask = bid =0 string ='' time = '' size = '' tprice = 0.0 tsize = singletrade = totdaysize= vwap ='' totdowns = totups = 0 tottickups=0 pricearrayraw =[] pnsize =[] lastsize = lastprice = bidsize = asksize = 9999 for line in rpu_rp.tail_array_to_array(rpu_rp.CsvToLines(filein),4440000): if 'tickString' in str(line) and 'tickType=48' in str(line): pnsizeline =[] ## print line time = line[3] string = line[2].split(';') if len(string) > 2: ## print string tprice = float(string[0].replace('value=','')) tsize = float(string[1]) singletrade = string[5] totdaysize = string[3] vwaps = string[4] if len(vwaps) > 0: vwap = round(float(string[4]),2) else: vwap =0.0 ## value=1984.50;1;1441796865858;207069;1982.49963539;true> ## print tsize,tprice,time,'trade', bid, ask, bidsize, asksize pricearrayraw.append(tprice) pnsizeline.append(tprice) pnsizeline.append(tsize) pnsize.append(pnsizeline) uarray = rpu_rp.uniqArray(pricearrayraw) for price in uarray: lvol =0.0 for l in pnsize: if price == l[0]: lvol += l[1] if lvol > 6000 : print price,lvol
def tallystocktrades(arrayin): prevstocksym ='x' arrout=[] for l in arrayin: arrout.append(l[1]) symarray = rpu_rp.uniqArray(arrout) ## syms = arrayin[1] for sym in symarray: status = 'flat' value2 = 0 netpos = 0 total = 0 print sym prevt=[] for t in sorted(arrayin, key=lambda bla: bla[4], reverse=False): stocksym = t[1] if stocksym == sym and t!= prevt: if status == 'flat' and t[3] == 'sell': value2 = 2 status = 'short' if status == 'flat' and t[3] == 'long': value2 = -2 status = 'long' if t[3] == 'coverhalf' and status == 'short': value2 = -1 status = 'short' if t[3] == 'coverhalf' and status == 'long': value2 = 1 status = 'long' if t[3] == 'coverfull' and status == 'short': value2 = netpos * -1 status = 'flat' if t[3] == 'coverfull' and status == 'long': value2 = netpos * -1 status = 'flat' netpos = value2 if len(t[2]) < 8: price = float(t[2]) pass else: price = float(0.0) total += price * netpos print t, total, netpos prevt = t
def parse_merc_input(filein): f = open(filein, 'r') f.close() f = open(filein, 'r') count = fcount = 0 flagon = 'n' trades =[] date ='nodatefound' doublespace = 'n' linegaps = '0' datedline = newdate ='need this need | | | ' day = month = time = '' for line in f.readlines(): liner = line.split() count += 1 if count < 3000000: ## <div class="post-byline">9:02 am - January 13, 2014 <meta http-equi if '<div class="post-byline">' in line: newdate=line.replace('<','>').split('>')[2] ## print newdate if '2014' in line and len(line.split()) > 3: month = line.split()[3] day = line.split()[4] date = month+day time = line.split()[0] if 'triggered' in line or 'covered' in line or 'executed' in line or 'hit our risk point' in line: datedline = line.strip() + '...' + newdate trades.append(datedline) stocklist =[] for l in trades: stock = l.split()[2] stock2 = l.split()[1] stocklist.append(stock) stocklist.append(stock2) ustklist = rpu_rp.uniqArray(stocklist) price = 'xx' linenum = 0 tradelist =[] for s in ustklist: tradeside = 'long' lengths = len(s) searchp = ' ' +s.upper() + ' ' searchp = ' ' + s.upper() searchp2 = '<li>' searchp2 = ' ' if 'AMTD' not in s and lengths >2 and lengths <5: print ' ====== ' + searchp + ' ======' for line2 in trades: tradeside = 'long' if searchp in line2 and searchp2 in line2 : newline = (line2.replace('<li>','|')).split('|') newlinedate = (line2.replace('...','|')).split('|') ## print newlinedate[1] newlineprice = (line2.replace('</li>','@')).split('@') if 'short' in str(line2) or 'sell' in str(line2) or 'sold' in str(line2): tradeside = 'sell' if '5-min rule' in str(line2): tradeside = 'notrade' if 'covered' in str(line2) : tradeside = 'coverfull' if 'covered half' in str(line2) : tradeside = 'coverhalf' if '@' in str(line2): price = newlineprice[1] try: trdmssg = newline[1] except: trdmssg = 'badline' ## print trdmssg, 'mssg', price, searchp, linenum, tradeside, newlinedate[1] ## print linenum, searchp, price, tradeside fulltrade = [] fulltrade.append(linenum) fulltrade.append(searchp) fulltrade.append(price) fulltrade.append(tradeside) fulltrade.append(newlinedate[1]) tradelist.append(fulltrade) fulltrade =[] linenum += 1 pass prevstocksym = 'bla' for t in sorted(tradelist, key=lambda bla: bla[0], reverse=True): stocksym = t[1] if prevstocksym != stocksym: status = 'flat' value2 = 0 netpos = 0 total = 0 if status == 'flat' and t[3] == 'sell': value2 = 2 status = 'short' if status == 'flat' and t[3] == 'long': value2 = -2 status = 'long' if t[3] == 'coverhalf' and status == 'short': value2 = -1 if t[3] == 'coverhalf' and status == 'long': value2 = 1 if t[3] == 'coverfull' and status == 'short': value2 = netpos * -1 if t[3] == 'coverfull' and status == 'long': value2 = netpos * -1 netpos = value2 total += float(t[2]) * netpos print t, total prevstocksym = stocksym f.close()
def parse_merc_input(filein): #### build the month dictionary from text to unix values ### monthdict = {} mlist = ['January', 'February','March', 'April', 'May','June','July','August','September','October','November','December'] count =1 for mth in mlist: if count < 10: monthdict[mth.upper()] = '0' + str(count) else: monthdict[mth.upper()] = '' + str(count) count +=1 ##################### create the trades database based on keywords for trades #### linesin = rpu_rp.TxtToLines(filein) count = 0 trades =[] datedline = newdate ='need this need | | | ' for line in linesin: count += 1 if count < 300000: ## <div class="post-byline">9:02 am - January 13, 2014 <meta http-equi if '<div class="post-byline">' in line: newdate=line.replace('<','>').split('>')[2] ## print newdate triggerwordlist = ['TRIGGERED','COVERED','EXECUTED','HIT OUR RISK POINT','BOUGHT','SOLD'] for word in triggerwordlist: if word in str(line).upper(): datedline = line.upper().strip() + '...' + newdate.upper() trades.append(datedline) #################### ##################### create stock list stocklist =[] for l in trades: ## print l splitline = l.replace('–','|') spl2 = splitline.replace('–','|') sp3 = spl2.split('|') if len(sp3) == 3 and "," in sp3[0]: print len(sp3), sp3 if ';' in l and '@' in l: if '–;;;;' in l: print l ## <p>LPX, YUM triggered – short LPX @ 13.67 – short YUM @ 67.61</p>...12:52 pm - September 25, 2012 stock = l.split()[2] stock2 = l.split()[1] stocklist.append(stock) stocklist.append(stock2) ################ ############### ustklist = rpu_rp.uniqArray(stocklist) price = 'xx' linenum = 0 tradelist =[] datelines = [] ### end of building stocklist ## for s in ustklist: tradeside = 'long' lengths = len(s) symbolfromlist = '' + s.upper() print s, symbolfromlist if 'AMTD' not in s and lengths >2 and lengths <5: searchp = 'x' ## print ' ====== ' + searchp + ' ======' for lineraw in trades: line2 = lineraw.upper() ## print line2 symbolline = line2.replace('€“','').replace('–','') symbol = symbolline.split()[1] tradeside = 'long' if symbolfromlist == symbol : ## print 'found match' ## print '###',symbol,'###' newline = (line2.replace('<LI>','|')).split('|') newlinedate = (line2.replace('...','|')).split('|') newlineprice = (line2.replace('</LI>','@')).split('@') if 'SHORT' in str(line2) or 'SELL' in str(line2) or 'SOLD' in str(line2): tradeside = 'sell' if '5-min rule'.upper() in str(line2): tradeside = 'notrade' if 'covered'.upper() in str(line2) : tradeside = 'coverfull' if 'covered half'.upper() in str(line2) : tradeside = 'coverhalf' if '@' in str(line2): price = newlineprice[1] try: trdmssg = newline[1] except: trdmssg = 'badline' ## print trdmssg, 'mssg', price, searchp, linenum, tradeside, newlinedate[1] fulltrade = [] dateline =[] txtdatemth = newlinedate[1].split()[3] ## print txtdatemth try: unixdatemth = monthdict[txtdatemth] ## print line2 except: print newlinedate print line2 unixdatemth = 'BLAAA' unixdateyr = newlinedate[1].split()[5] unixdateday = newlinedate[1].split()[4] if len(unixdateday) == 2: realday = ('0' + unixdateday).replace(',','') else: realday = ('' + unixdateday).replace(',','') dateline.append(linenum) dateline.append(unixdateyr + unixdatemth + realday) datelines.append(dateline) fulltrade.append('1')#linenum) fulltrade.append(symbol) fulltrade.append(price) fulltrade.append(tradeside) fulltrade.append(unixdateyr + unixdatemth + realday) tradelist.append(fulltrade) linenum += 1 prevstocksym = 'bla' datesarray =[] sortedtrades = sorted(tradelist, key=lambda bla: bla[1], reverse=True) sorted(tradelist, key=lambda bla: bla[1], reverse=True) rpu_rp.WriteArrayToCsvfile('blatrades.csv',sortedtrades) for t in sortedtrades: date = t[1] datesarray.append(date) udates = rpu_rp.uniqArray(datesarray) return sortedtrades