Example #1
0
def findDupnrename(parentFolder):
    # Dups in format {hash:[names]}
    dups = {}
    for dirName, subdirs, fileList in os.walk(parentFolder):
        print('Scanning %s...' % dirName)
        for filename in fileList:
            # Get the path to the file
            path = os.path.join(dirName, filename)
            dirdetails = (((dirName.replace(stemdir, '')).replace(
                '\\', ' '))).upper().split()
            udetails = rpu_rp.uniqArray(dirdetails)
            lena = len(udetails)
            newfname = ''
            c = 0
            while c < lena:
                newfname += udetails[c]
                newfname += ' '
                c += 1
            newfname += filename
            print newfname
            wdirname = outputtest + newfname
            ##            shutil.copyfile(path,wdirname)# Calculate hash
            file_hash = hashfile(path)
            # Add or append the file path
            if file_hash in dups:
                dups[file_hash].append(path.replace(stemdir, ''))
            else:
                dups[file_hash] = [path.replace(stemdir, '')]
    return dups
def findDupnrename(parentFolder):
    # Dups in format {hash:[names]}
    dups = {}
    for dirName, subdirs, fileList in os.walk(parentFolder):
        print('Scanning %s...' % dirName)
        for filename in fileList:
            # Get the path to the file
            path = os.path.join(dirName, filename)
            dirdetails = (((dirName.replace(stemdir,'')).replace('\\',' '))).upper().split()
            udetails = rpu_rp.uniqArray(dirdetails)
            lena = len(udetails)
            newfname = ''
            c=0
            while c < lena:                
                newfname += udetails[c]
                newfname += ' '
                c+=1
            newfname += filename
            print newfname
            wdirname = outputtest + newfname
##            shutil.copyfile(path,wdirname)# Calculate hash
            file_hash = hashfile(path)
            # Add or append the file path
            if file_hash in dups:
                dups[file_hash].append(path.replace(stemdir,''))               
            else:
                dups[file_hash] = [path.replace(stemdir,'')]
    return dups
def createVolHistogram(sym, fnums):
    filein = DataDown + date + '.' + sym + '.RTtickData.csv'
    totlen = len(rpu_rp.CsvToLines(filein))
    thirdlen = int(totlen / 3)
    tails = [thirdlen, thirdlen * 2, totlen]
    for tailvalue in tails:
        print totlen
        sleep(4)
        prevtotdiff = ask = bid = 0
        string = ''
        time = ''
        size = ''
        tprice = 0.0
        tsize = singletrade = totdaysize = vwap = ''
        totdowns = totups = 0
        tottickups = 0
        pricearrayraw = []
        pnsize = []
        lastsize = lastprice = bidsize = asksize = 9999
        for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein),
                                               tailvalue):
            if 'tickString' in str(line) and 'tickType=48' in str(line):
                pnsizeline = []
                ##            print line
                time = line[3]
                string = line[2].split(';')
                if len(string) > 2:
                    ##                print string
                    tprice = float(string[0].replace('value=', ''))
                    tsize = float(string[1])
                    singletrade = string[5]
                    totdaysize = string[3]
                    vwaps = string[4]
                    if len(vwaps) > 0:
                        vwap = round(float(string[4]), 2)
                    else:
                        vwap = 0.0
    ##            value=1984.50;1;1441796865858;207069;1982.49963539;true>
    ##            print tsize,tprice,time,'trade', bid, ask, bidsize, asksize
                pricearrayraw.append(tprice)
                pnsizeline.append(tprice)
                pnsizeline.append(tsize)
                pnsize.append(pnsizeline)

        uarray = rpu_rp.uniqArray(pricearrayraw)
        ldiv = 1000
        print "\n" * 50
        strings = ''
        for price in uarray:
            lvol = 0.0
            for l in pnsize:
                if price == l[0] and price > 0.0:
                    lvol += l[1]
            if lvol > 12000:
                ##            print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l'))
                strings += ('avg %6.2f ... %4d .. %s\n' %
                            (price, lvol, int(lvol / ldiv) * '|'))
        print strings
def createVolHistogram(sym,fnums):
    filein = DataDown +date+'.'+sym+ '.RTtickData.csv'
    totlen =  len(rpu_rp.CsvToLines(filein))
    thirdlen = int(totlen/3)
    tails = [thirdlen,thirdlen*2,totlen]
    for tailvalue in tails:
        print totlen
        sleep(4)
        prevtotdiff = ask = bid =0
        string =''
        time = ''
        size = ''
        tprice = 0.0
        tsize = singletrade = totdaysize= vwap =''
        totdowns = totups = 0
        tottickups=0
        pricearrayraw =[]
        pnsize =[]
        lastsize = lastprice = bidsize = asksize =  9999
        for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein),tailvalue):
            if 'tickString' in str(line) and 'tickType=48' in str(line):
                pnsizeline =[]
    ##            print line
                time = line[3]
                string = line[2].split(';')
                if len(string) > 2:
    ##                print string
                    tprice = float(string[0].replace('value=',''))
                    tsize = float(string[1])
                    singletrade = string[5]
                    totdaysize = string[3]
                    vwaps  = string[4]
                    if len(vwaps) > 0:
                        vwap  = round(float(string[4]),2)
                    else:
                        vwap =0.0
    ##            value=1984.50;1;1441796865858;207069;1982.49963539;true>
    ##            print tsize,tprice,time,'trade', bid, ask, bidsize, asksize
                pricearrayraw.append(tprice)
                pnsizeline.append(tprice)
                pnsizeline.append(tsize)
                pnsize.append(pnsizeline)

        uarray = rpu_rp.uniqArray(pricearrayraw)
        ldiv = 1000
        print "\n" * 50
        strings =''
        for price in uarray:
            lvol =0.0
            for l in pnsize:
                if price == l[0] and price > 0.0 :
                    lvol += l[1]
            if lvol > 12000 :
    ##            print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l'))
                strings +=  ('avg %6.2f ... %4d .. %s\n' % (price,lvol,int(lvol/ldiv)*'|'))
        print strings
def createVolHistogramdload(sym, fnums, ldiv, roundfactor):
    filein = DataDown + date + '.' + sym + '.1min.both.csv'
    totlen = len(rpu_rp.CsvToLines(filein))
    thirdlen = int(totlen / 3)
    tails = [thirdlen, thirdlen * 2, totlen]
    for tailvalue in tails:
        print totlen, thirdlen
        sleep(2)
        prevtotdiff = ask = bid = 0
        string = ''
        time = ''
        size = ''
        tprice = 0.0
        tsize = singletrade = totdaysize = vwap = ''
        totdowns = totups = 0
        tottickups = 0
        pricearrayraw = []
        pnsize = []
        lastsize = lastprice = bidsize = asksize = 9999
        for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein),
                                               tailvalue):
            if len(line) > 2 and '2015-12-10' in str(line):
                ##                print line
                pnsizeline = []
                time = line[1]
                tprice = float(line[5].replace('value=', ''))
                tprice = round(float(line[5].replace('value=', '')),
                               roundfactor)
                tsize = float(line[6])
                ##                singletrade = string[5]
                ##                totdaysize = string[3]
                ##                vwaps  = string[4]
                vwap = 0.0
                ##            value=1984.50;1;1441796865858;207069;1982.49963539;true>
                ##            print tsize,tprice,time,'trade', bid, ask, bidsize, asksize
                pricearrayraw.append(tprice)
                pnsizeline.append(tprice)
                pnsizeline.append(tsize)
                pnsize.append(pnsizeline)

        uarray = rpu_rp.uniqArray(pricearrayraw)
        ##        ldiv = 200
        print "\n" * 50
        strings = ''
        for price in uarray:
            lvol = 0.0
            for l in pnsize:
                if price == l[0] and price > 0.0:
                    lvol += l[1]
            if lvol > 0:
                ##            print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l'))
                strings += ('avg %6.2f ... %4d .. %s\n' %
                            (price, lvol, int(lvol / ldiv) * '|'))
        print strings
def createVolHistogramdload(sym,fnums,ldiv,roundfactor):
    filein = DataDown +date+'.'+sym+ '.1min.both.csv'
    totlen =  len(rpu_rp.CsvToLines(filein))
    thirdlen = int(totlen/3)
    tails = [thirdlen,thirdlen*2,totlen]
    for tailvalue in tails:
        print totlen, thirdlen
        sleep(2)
        prevtotdiff = ask = bid =0
        string =''
        time = ''
        size = ''
        tprice = 0.0
        tsize = singletrade = totdaysize= vwap =''
        totdowns = totups = 0
        tottickups=0
        pricearrayraw =[]
        pnsize =[]
        lastsize = lastprice = bidsize = asksize =  9999
        for line in rpu_rp.head_array_to_array(rpu_rp.CsvToLines(filein),tailvalue):
            if len(line) > 2 and '2015-12-10' in str(line):
##                print line
                pnsizeline =[]
                time = line[1]
                tprice = float(line[5].replace('value=',''))
                tprice = round(float(line[5].replace('value=','')),roundfactor)
                tsize = float(line[6])
##                singletrade = string[5]
##                totdaysize = string[3]
##                vwaps  = string[4]
                vwap =0.0
                ##            value=1984.50;1;1441796865858;207069;1982.49963539;true>
                ##            print tsize,tprice,time,'trade', bid, ask, bidsize, asksize
                pricearrayraw.append(tprice)
                pnsizeline.append(tprice)
                pnsizeline.append(tsize)
                pnsize.append(pnsizeline)

        uarray = rpu_rp.uniqArray(pricearrayraw)
##        ldiv = 200
        print "\n" * 50
        strings =''
        for price in uarray:
            lvol =0.0
            for l in pnsize:
                if price == l[0] and price > 0.0 :
                    lvol += l[1]
            if lvol > 0 :
    ##            print ('avg %6.2f ... %4d .. %s' % (price,lvol,int(lvol/ldiv)*'l'))
                strings +=  ('avg %6.2f ... %4d .. %s\n' % (price,lvol,int(lvol/ldiv)*'|'))
        print strings
Example #7
0
def createVolHistogram(sym, fnums):
    ##    fullarray =[]
    ##    today = '20151013'
    filein = DataDown + today + '.' + sym + '.RTtickData.csv'
    prevtotdiff = ask = bid = 0
    string = ''
    time = ''
    size = ''
    tprice = 0.0
    tsize = singletrade = totdaysize = vwap = ''
    totdowns = totups = 0
    tottickups = 0
    pricearrayraw = []
    pnsize = []
    lastsize = lastprice = bidsize = asksize = 9999
    for line in rpu_rp.tail_array_to_array(rpu_rp.CsvToLines(filein), 444):
        if 'tickString' in str(line) and 'tickType=48' in str(line):
            pnsizeline = []
            ##            print line
            time = line[3]
            string = line[2].split(';')
            if len(string) > 2:
                ##                print string
                tprice = float(string[0].replace('value=', ''))
                tsize = float(string[1])
                singletrade = string[5]
                totdaysize = string[3]
                vwaps = string[4]
                if len(vwaps) > 0:
                    vwap = round(float(string[4]), 2)
                else:
                    vwap = 0.0
##            value=1984.50;1;1441796865858;207069;1982.49963539;true>
##            print tsize,tprice,time,'trade', bid, ask, bidsize, asksize
            pricearrayraw.append(tprice)
            pnsizeline.append(tprice)
            pnsizeline.append(tsize)
            pnsize.append(pnsizeline)

    uarray = rpu_rp.uniqArray(pricearrayraw)
    for price in uarray:
        lvol = 0.0
        for l in pnsize:
            if price == l[0]:
                lvol += l[1]
        if lvol > 6000:
            print price, lvol
def createVolHistogram(sym,fnums):
##    fullarray =[]
##    today = '20151013'
    filein = DataDown +today+'.'+sym+ '.RTtickData.csv'
    prevtotdiff = ask = bid =0
    string =''
    time = ''
    size = ''
    tprice = 0.0
    tsize = singletrade = totdaysize= vwap =''
    totdowns = totups = 0
    tottickups=0
    pricearrayraw =[]
    pnsize =[]
    lastsize = lastprice = bidsize = asksize =  9999
    for line in rpu_rp.tail_array_to_array(rpu_rp.CsvToLines(filein),4440000):
        if 'tickString' in str(line) and 'tickType=48' in str(line):
            pnsizeline =[]
##            print line
            time = line[3]
            string = line[2].split(';')
            if len(string) > 2:
##                print string
                tprice = float(string[0].replace('value=',''))
                tsize = float(string[1])
                singletrade = string[5]
                totdaysize = string[3]
                vwaps  = string[4]
                if len(vwaps) > 0:
                    vwap  = round(float(string[4]),2)
                else:
                    vwap =0.0
##            value=1984.50;1;1441796865858;207069;1982.49963539;true>
##            print tsize,tprice,time,'trade', bid, ask, bidsize, asksize
            pricearrayraw.append(tprice)
            pnsizeline.append(tprice)
            pnsizeline.append(tsize)
            pnsize.append(pnsizeline)

    uarray = rpu_rp.uniqArray(pricearrayraw)
    for price in uarray:
        lvol =0.0
        for l in pnsize:
            if price == l[0]:
                lvol += l[1]
        if lvol > 6000 :
            print price,lvol
Example #9
0
def tallystocktrades(arrayin):
    prevstocksym ='x'
    arrout=[]
    for l in arrayin:
        arrout.append(l[1])
        symarray = rpu_rp.uniqArray(arrout)
##    syms = arrayin[1]
    for sym in symarray:
        status = 'flat'
        value2 = 0
        netpos = 0
        total = 0
        print sym
        prevt=[]
        for t in sorted(arrayin, key=lambda bla: bla[4], reverse=False):
            stocksym = t[1]
            if stocksym == sym and t!= prevt:             
                if status == 'flat' and t[3] == 'sell':
                    value2 = 2
                    status = 'short'
                if status == 'flat' and t[3] == 'long':        
                    value2 = -2
                    status = 'long'
                if t[3] == 'coverhalf' and status == 'short':
                    value2 = -1
                    status = 'short'
                if t[3] == 'coverhalf' and status == 'long':
                    value2 = 1
                    status = 'long'
                if t[3] == 'coverfull' and status == 'short':
                    value2 = netpos * -1
                    status = 'flat'
                if t[3] == 'coverfull' and status == 'long':
                    value2 = netpos * -1
                    status = 'flat'
                netpos = value2
                if len(t[2]) < 8:
                    price = float(t[2])
                    pass
                else:
                    price = float(0.0)
                total += price * netpos
                print t, total, netpos
                prevt = t
Example #10
0
def parse_merc_input(filein):
    f = open(filein, 'r')
    f.close()
    f = open(filein, 'r')
    count = fcount = 0
    flagon = 'n'
    trades =[]
    date ='nodatefound'
    doublespace = 'n'
    linegaps = '0'

    datedline = newdate ='need this need | | | '
    day = month = time = ''
    for line in f.readlines():
        liner = line.split()       
        count += 1
        if count < 3000000:
##            <div class="post-byline">9:02 am - January 13, 2014 <meta http-equi
            if '<div class="post-byline">' in line:
                newdate=line.replace('<','>').split('>')[2]
##                print newdate
            if '2014' in line and len(line.split()) > 3:
                month = line.split()[3]
                day = line.split()[4]
                date = month+day
                time = line.split()[0]
            if 'triggered' in line or 'covered' in line or 'executed' in line or 'hit our risk point'  in line:
                datedline = line.strip() + '...' + newdate
                trades.append(datedline)
    stocklist =[]
    for l in trades:
        stock = l.split()[2]
        stock2 = l.split()[1]
        stocklist.append(stock)
        stocklist.append(stock2)
    ustklist = rpu_rp.uniqArray(stocklist)
    price = 'xx'
    linenum = 0
    tradelist =[]   
    for s in ustklist:
        tradeside = 'long'
        lengths = len(s)       
        searchp = ' ' +s.upper() + ' '
        searchp = ' ' + s.upper()
        searchp2 = '<li>'
        searchp2 = ' '
        if 'AMTD' not in  s  and lengths >2 and   lengths <5:
            print ' ====== ' + searchp +  ' ======'
            for line2 in trades:
                tradeside = 'long'
                if searchp in line2 and searchp2 in line2 :
                    newline = (line2.replace('<li>','|')).split('|')
                    newlinedate = (line2.replace('...','|')).split('|')
##                    print newlinedate[1]
                    newlineprice = (line2.replace('</li>','@')).split('@')
                    if 'short' in str(line2) or 'sell' in str(line2) or 'sold' in str(line2):
                        tradeside  = 'sell'
                    if '5-min rule' in str(line2):
                        tradeside  = 'notrade'
                    if 'covered' in str(line2)  :
                        tradeside  = 'coverfull'
                    if 'covered half' in str(line2) :
                        tradeside  = 'coverhalf' 
                    if '@' in str(line2):
                        price  = newlineprice[1]
                    try:
                        trdmssg = newline[1]
                    except:
                        trdmssg = 'badline'
##                    print trdmssg, 'mssg', price, searchp, linenum, tradeside, newlinedate[1]
##                    print linenum, searchp, price, tradeside
                    fulltrade = []
                    fulltrade.append(linenum)
                    fulltrade.append(searchp)
                    fulltrade.append(price)
                    fulltrade.append(tradeside)
                    fulltrade.append(newlinedate[1])
                    tradelist.append(fulltrade)
                    fulltrade =[]
                    linenum += 1
                    pass
    prevstocksym = 'bla'
    for t in sorted(tradelist, key=lambda bla: bla[0], reverse=True):
        stocksym = t[1]
        if prevstocksym != stocksym:
            status = 'flat'
            value2 = 0
            netpos = 0
            total = 0
        if status == 'flat' and t[3] == 'sell':
            value2 = 2
            status = 'short'
        if status == 'flat' and t[3] == 'long':        
            value2 = -2
            status = 'long'
        if t[3] == 'coverhalf' and status == 'short':
            value2 = -1
        if t[3] == 'coverhalf' and status == 'long':
            value2 = 1
        if t[3] == 'coverfull' and status == 'short':
            value2 = netpos * -1
        if t[3] == 'coverfull' and status == 'long':
            value2 = netpos * -1
        netpos = value2     
        total += float(t[2]) * netpos
        print t, total
        prevstocksym = stocksym
    f.close()
Example #11
0
def parse_merc_input(filein):
    #### build the month dictionary from text to unix values   ###
    monthdict = {}
    mlist = ['January', 'February','March', 'April', 'May','June','July','August','September','October','November','December']
    count =1
    for mth in mlist:
        if count < 10:
            monthdict[mth.upper()] = '0' + str(count)
        else:
            monthdict[mth.upper()] = '' + str(count)
        count +=1
#####################  create the trades database based on keywords for trades  ####
    linesin = rpu_rp.TxtToLines(filein)
    count  = 0
    trades =[]
    datedline = newdate ='need this need | | | '  
    for line in linesin:
        count += 1
        if count < 300000:
##            <div class="post-byline">9:02 am - January 13, 2014 <meta http-equi
            if '<div class="post-byline">' in line:
                newdate=line.replace('<','>').split('>')[2]
##                print newdate
            triggerwordlist = ['TRIGGERED','COVERED','EXECUTED','HIT OUR RISK POINT','BOUGHT','SOLD']
            for word in triggerwordlist:
                if word in str(line).upper():
                    datedline = line.upper().strip() + '...' + newdate.upper()
                    trades.append(datedline)
                ####################
    #####################  create stock list
    stocklist =[]
    for l in trades:
##        print l
        splitline = l.replace('–','|')
        spl2 = splitline.replace('&#8211;','|')
        sp3 = spl2.split('|')
        if len(sp3) == 3 and "," in sp3[0]:
            print len(sp3), sp3
        if ';' in l and '@' in l:
            if '&#8211;;;;;' in l:
                print l
##            <p>LPX, YUM triggered &#8211; short LPX @ 13.67 &#8211; short YUM @ 67.61</p>...12:52 pm - September 25, 2012
        stock = l.split()[2]
        stock2 = l.split()[1]
        stocklist.append(stock)
        stocklist.append(stock2)
        ################
        ###############
    ustklist = rpu_rp.uniqArray(stocklist)
    price = 'xx'
    linenum = 0
    tradelist =[]
    datelines = []
    ### end of building stocklist ##
    for s in ustklist:
        tradeside = 'long'
        lengths = len(s)       
        symbolfromlist = '' + s.upper()
        print s, symbolfromlist               
        if 'AMTD' not in  s  and lengths >2 and   lengths <5:
            searchp = 'x'
##            print ' ====== ' + searchp +  ' ======'
            for lineraw in trades:
                line2 = lineraw.upper()
##                print line2
                symbolline = line2.replace('€“','').replace('&#8211;','')
                symbol = symbolline.split()[1]
                
                tradeside = 'long'
                if symbolfromlist ==  symbol  :
##                    print 'found match'
##                    print '###',symbol,'###'
                    newline = (line2.replace('<LI>','|')).split('|')
                    newlinedate = (line2.replace('...','|')).split('|')
                    newlineprice = (line2.replace('</LI>','@')).split('@')
                    if 'SHORT' in str(line2) or 'SELL' in str(line2) or 'SOLD' in str(line2):
                        tradeside  = 'sell'
                    if '5-min rule'.upper() in str(line2):
                        tradeside  = 'notrade'
                    if 'covered'.upper() in str(line2)  :
                        tradeside  = 'coverfull'
                    if 'covered half'.upper() in str(line2) :
                        tradeside  = 'coverhalf' 
                    if '@' in str(line2):
                        price  = newlineprice[1]
                    try:
                        trdmssg = newline[1]
                    except:
                        trdmssg = 'badline'
##                    print trdmssg, 'mssg', price, searchp, linenum, tradeside, newlinedate[1]
                    fulltrade = []
                    dateline =[]
                    
                    txtdatemth = newlinedate[1].split()[3]
##                    print txtdatemth
                    try:
                        unixdatemth = monthdict[txtdatemth]
##                        print line2
                        
                    except:
                        print newlinedate
                        print line2
                        unixdatemth = 'BLAAA'
                    unixdateyr = newlinedate[1].split()[5]
                    unixdateday = newlinedate[1].split()[4]
                    if len(unixdateday) == 2:
                        realday = ('0' + unixdateday).replace(',','')
                    else:
                        realday = ('' + unixdateday).replace(',','')
                    
                    dateline.append(linenum)
                    dateline.append(unixdateyr + unixdatemth + realday)
                    datelines.append(dateline)

                    fulltrade.append('1')#linenum)
                    fulltrade.append(symbol)
                    fulltrade.append(price)
                    fulltrade.append(tradeside)
                    fulltrade.append(unixdateyr + unixdatemth + realday)

                    tradelist.append(fulltrade)
                    linenum += 1
    prevstocksym = 'bla'
    datesarray =[]
    sortedtrades = sorted(tradelist, key=lambda bla: bla[1], reverse=True)
    sorted(tradelist, key=lambda bla: bla[1], reverse=True)
    rpu_rp.WriteArrayToCsvfile('blatrades.csv',sortedtrades)
    for t in sortedtrades:
        date = t[1]
        datesarray.append(date)
    udates =  rpu_rp.uniqArray(datesarray)
    return sortedtrades