def main_iterator():
    global num_rows
    global revised_values

    records_in_file = num_records(dataLarge)
    parts_size = records_in_file / parts

    skip_rows = 0
    part_index = 1

    while(part_index <= parts):

        #df = pd.read_csv(dataLarge, sep=' ', header=None, index_col=0, usecols=[0, index1, index2], skiprows=2500000, nrows=2500000)
        #eprint("Read rows till %d. skip rows: %d , parts_size %d " %(skip_rows + parts_size, skip_rows, parts_size))
        df = pd.read_csv(dataLarge, sep=' ', engine='python', header=None, index_col=0, usecols=[0, index1, index2], skiprows=skip_rows, nrows=parts_size)
        df = df.replace(to_replace='NaN', value =0.0).cumsum()
        total_rows = df.shape[0]
        
        rindex1 = []
        rindex2 = []

        i = 0
        rows_read = 0
        while(rows_read < total_rows):
            read_till = rows_read + aggr_records
            if(read_till > total_rows):
                read_till = total_rows
            #eprint("i %d, read_till %d" %(i, read_till))
            #rindex1[i] = df[[index1]].iloc[rows_read:read_till].sum();
            #rindex2[i] = df[[index2]].iloc[rows_read:read_till].sum();
            rindex1.append(df[[index1]].iloc[read_till-1])
            rindex2.append(df[[index2]].iloc[read_till-1])
            #eprint("%d] %f : %f" %(i, rindex1[i], rindex2[i]))
            i += 1
            rows_read += aggr_records

        #Now calculte the rho  and p-value by spearman ranking for correlation
        rho, pvalue = st.spearmanr(rindex1, rindex2)
        print "For series %d and %d, part %d spearman rho: %f" %(index1, index2, part_index, rho)

        #plot the graph per part
        #plot the incrimental graph
        image_name='images/TS' + `index1` + '-' + 'TS' + `index2` + '-Part-' + `part_index` + '.png'
        fig = plt.figure()
        ax1 = fig.add_subplot(111)
        plt.xlabel('Time')
        plt.ylabel('Incrimental Difference Summation')
        plt.title('TS' +  `index1` + ' Vs' + ' TS' + `index2` + ' [ Part' + `part_index` + '] rho(corr coef):' + `rho`)
        plt.plot(rindex1)
        plt.plot(rindex2)
        eprint("Fig: %s" %(image_name))
        plt.savefig(image_name)
        plt.close()

        skip_rows += parts_size
        part_index += 1
def calculate_stats(ts, index):
    stddev = ts.std()
    mean = ts.mean()
    cmean, cstddev = cal_mean_std(ts)

    mean_test = 'FAIL' 
    if(str(cmean) == str(mean)):
        mean_test = 'PASS'

    stddev_test = 'FAIL' 
    if(str(cstddev) == str(stddev)):
        stddev_test = 'PASS'

    with print_lock:
        print "ts: %d, mean: %f, cmean: %f, stddev: %f, cstddev: %f, mean_test %s, stddev_test: %s" %(index, mean, cmean, stddev, cstddev, mean_test, stddev_test)

    if (mean_test == 'FAIL' or stddev_test == 'FAIL'):
        eprint("For index : %d, mean_test: %s, stddev_test %s" %(index, mean_test, stddev_test))
def calculate_stats(ts, index):
    stddev = ts.std()
    mean = ts.mean()
    cmean, cstddev = cal_mean_std(ts)

    mean_test = 'FAIL'
    if (str(cmean) == str(mean)):
        mean_test = 'PASS'

    stddev_test = 'FAIL'
    if (str(cstddev) == str(stddev)):
        stddev_test = 'PASS'

    with print_lock:
        print "ts: %d, mean: %f, cmean: %f, stddev: %f, cstddev: %f, mean_test %s, stddev_test: %s" % (
            index, mean, cmean, stddev, cstddev, mean_test, stddev_test)

    if (mean_test == 'FAIL' or stddev_test == 'FAIL'):
        eprint("For index : %d, mean_test: %s, stddev_test %s" %
               (index, mean_test, stddev_test))
def main():
    # to synchronize the output
    global print_lock
    print_lock = Lock()
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:g",
                                   ["help", "file=", "graph"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    global dataLarge
    global plot_graphs
    dataLarge = ''
    plot_graphs = False

    for o, a in opts:
        if o in ("-g", "--graph"):
            plot_graphs = True
        elif o in ("-f", "--file"):
            dataLarge = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    #read the data file location
    absPath = os.getcwd() + '/' + dataLarge

    #see if file exists and then proceed
    if not exists(dataLarge):
        if (dataLarge == ''):
            eprint(
                "Please provide valid data file as -f argument on command line"
            )
        else:
            eprint("Data file does not exists aborting : %s" % (dataLarge))
            eprint("abs path : %s" % (absPath))
        usage()
        sys.exit()
    else:
        eprint("Processing data file : %s " % (dataLarge))

    if not os.path.exists('images'):
        os.makedirs('images')

    main_iterator()
def main():
    # to synchronize the output
    global print_lock
    print_lock = Lock()

    # to synchronize the summations
    global sum_lock
    sum_lock = Lock()

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:r:m",
                                   ["help", "file=", "rows=", "mt"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    global num_rows
    global dataLarge

    num_rows = 0
    dataLarge = 'None'

    global total_sum
    global total_square_sum
    global total_samples

    total_sum = [0.0] * 26
    total_square_sum = [0.0] * 26
    total_samples = [0.0] * 26

    for o, a in opts:
        if o in ("-f", "--file"):
            dataLarge = a
        elif o in ("-r", "--rows"):
            num_rows = int(a)
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    #read the data file location
    absPath = os.getcwd() + '/' + dataLarge

    #see if file exists and then proceed
    if not exists(dataLarge):
        eprint("Data file does not exists aborting : %s" % (dataLarge))
        eprint("abs path : %s" % (absPath))
        usage()
        sys.exit()
    else:
        eprint("Processing data file : %s " % (dataLarge))

    if not os.path.exists('images'):
        os.makedirs('images')

    main_iterator()
def main():
    # to synchronize the output
    global print_lock
    print_lock = Lock()
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:g", ["help", "file=", "graph"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    global dataLarge
    global plot_graphs
    dataLarge = ''
    plot_graphs = False

    for o, a in opts:
        if o in ("-g", "--graph"):
            plot_graphs = True
        elif o in ("-f", "--file"):
            dataLarge = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    #read the data file location
    absPath= os.getcwd()  + '/' + dataLarge

    #see if file exists and then proceed
    if not exists(dataLarge):
        if(dataLarge == ''):
            eprint("Please provide valid data file as -f argument on command line")
        else:
            eprint("Data file does not exists aborting : %s" %(dataLarge))
            eprint("abs path : %s" %(absPath))
        usage()
        sys.exit()
    else:
        eprint("Processing data file : %s " %(dataLarge))

    if not os.path.exists('images'):
        os.makedirs('images')

    main_iterator()
def main():
    # to synchronize the output
    global print_lock
    print_lock = Lock()
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:", ["help", "file="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    global tradesFile
    tradesFile = ''

    for o, a in opts:
        if o in ("-f", "--file"):
            tradesFile = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    #read the data file location
    absPath = os.getcwd() + '/' + tradesFile

    #see if file exists and then proceed
    if not exists(tradesFile):
        if (tradesFile == ''):
            eprint(
                "Please provide valid trades file as -f argument on command line"
            )
        else:
            eprint("Data file does not exists aborting : %s" % (tradesFile))
            eprint("abs path : %s" % (absPath))
        usage()
        sys.exit()
    else:
        eprint("Processing data file : %s " % (tradesFile))

    main_iterator()
def main():

    # to synchronize the output
    global print_lock
    global sum_lock
    sum_lock = Lock()
    print_lock = Lock()
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:m",
                                   ["help", "file=", "mt"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    global dataLarge
    global multi_threaded
    multi_threaded = False

    dataLarge = 'None'

    for o, a in opts:
        if o in ("-m", "--mt"):
            multi_threaded = True
        elif o in ("-f", "--file"):
            dataLarge = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    #read the data file location
    absPath = os.getcwd() + '/' + dataLarge

    #see if file exists and then proceed
    if not exists(dataLarge):
        eprint("Data file does not exists aborting : %s" % (dataLarge))
        eprint("abs path : %s" % (absPath))
        usage()
        sys.exit()
    else:
        eprint("Processing data file : %s " % (dataLarge))

    if not os.path.exists('images'):
        os.makedirs('images')

    main_iterator()
def usage():
    eprint(
        " ****************************************************************************"
    )
    eprint(" %s usage : " % (argv[0]))
    eprint(" %s -f <data file> [-m]" % (argv[0]))
    eprint(" eg %s -f dataLarge")
    eprint(
        " -f --file  : data file containing time series with 0th coloumn as index"
    )
    eprint(" -h --help  : help")
    eprint(
        " ****************************************************************************"
    )
def usage():
    eprint(
        " ****************************************************************************"
    )
    eprint(" %s usage : " % (argv[0]))
    eprint(" %s -f <data file> [-g]" % (argv[0]))
    eprint(" eg %s -f trades.txt")
    eprint(
        " -f --file  : data file containing quantity and price of trades, with header"
    )
    eprint(" -h --help  : help")
    eprint(
        " ****************************************************************************"
    )
def usage():
    eprint(" ****************************************************************************")
    eprint(" %s usage : " %(argv[0]))
    eprint(" %s -f <data file> [-g]" %(argv[0]))
    eprint(" eg %s -f dataLarge")
    eprint(" -f --file  : data file containing time series with 0th coloumn as index")
    eprint(" -g --graph : plot the graphs, Needs addition time ")
    eprint(" -h --help  : help")
    eprint(" ****************************************************************************")
def usage():
    eprint(
        " ****************************************************************************"
    )
    eprint(" %s usage : " % (argv[0]))
    eprint(" %s -f <data file> [-g]" % (argv[0]))
    eprint(" eg %s -f dataLarge")
    eprint(
        " -f --file  : data file containing time series with 0th coloumn as index"
    )
    eprint(" -g --graph : plot the graphs, Needs addition time ")
    eprint(" -h --help  : help")
    eprint(
        " ****************************************************************************"
    )
Beispiel #13
0
def usage():
    eprint(" ****************************************************************************")
    eprint(" %s usage : " %(argv[0]))
    eprint(" For correlation between series 2 and 3. Do analysis in 4 equal parts")
    eprint(" %s -f <data file> [-m] -s 2,3 -p 4" %(argv[0]))
    eprint(" eg %s -f dataLarge")
    eprint(" -f --file  : data file containing time series with 0th coloumn as index")
    eprint(" -a --aggr  : num of records to aggr and represent as single record, default is 100000")
    eprint(" -s --series : , saperated two series indices to compare ")
    eprint(" -p --parts : No of equal parts for which to calculate the correlation")
    eprint(" -h --help  : help")
    eprint(" ****************************************************************************")
Beispiel #14
0
def main():
    # to synchronize the output
    global print_lock
    print_lock = Lock()

    # to synchronize the summations
    global sum_lock
    sum_lock = Lock()

    try:
        opts, args = getopt.getopt(sys.argv[1:], "ha:f:s:p:", ["help", "aggr=", "file=", "series=", "parts="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

   
    global dataLarge
    global index1
    global index2
    global aggr_records 
    global parts

    dataLarge = 'None'
    multi_threaded = False
    index1 = 0
    index2 = 0
    aggr_records = 100000 
    parts = 1

    for o, a in opts:
        if o in ("-a", "--aggr"):
            aggr_records = int(a)
        elif o in ("-f", "--file"):
            dataLarge = a
        elif o in ("-p", "--parts"):
            parts = int(a)
        elif o in ("-s", "--series"):
            index1, index2 = a.split(',')
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    #read the data file location
    absPath= os.getcwd()  + '/' + dataLarge

    #see if file exists and then proceed
    if not exists(dataLarge):
        eprint("Data file does not exists aborting : %s" %(dataLarge))
        eprint("abs path : %s" %(absPath))
        usage()
        sys.exit()
    else:
        eprint("Processing data file : %s " %(dataLarge))

    if not os.path.exists('images'):
        os.makedirs('images')

    index1 = int(index1)
    index2 = int(index2)
    if(index1 == 0 or index2 == 0):
        eprint("Please provide correct indices of time series to comapre with option -s");
        usage()
        sys.exit()

    main_iterator()
def usage():
    eprint(" ****************************************************************************")
    eprint(" %s usage : " %(argv[0]))
    eprint(" %s -f <data file> [-m]" %(argv[0]))
    eprint(" eg %s -f dataLarge")
    eprint(" -f --file  : data file containing time series with 0th coloumn as index")
    eprint(" -h --help  : help")
    eprint(" ****************************************************************************")