Exemple #1
0
def merge():
    df_open = ipo_lab.load_data("./ipo_open.csv")
    sdf_open = df_open.sort_values(by=['symbol'])
    with open("./ipo5.csv") as f:
        line = f.readline().strip()
        ls = line.split(',')
        ls.insert(1, "date")
        print ','.join(ls)
        line = f.readline().strip()
        while line:
            ls = line.split(',')
            symbol = ls[0]
            d = df_open.loc[df_open["symbol"] == symbol].iloc[0, 0]
            if d is None:
                print "--------- missing", symbol
                continue
            ls.insert(1, str(d))
            print ','.join(ls)
            line = f.readline().strip()
        print line

    exit()
    df = ipo_lab.load_data("./ipo.csv")
    sdf = df.sort_values(by=["symbol"])

    print "symbol,date,y,x1,x2,x3,x4,x5,underwriter,exchange"
    j = 0
    for i in range(len(sdf_open)):
        row_open = sdf_open.iloc[i]
        row = sdf.iloc[j]
        while row["symbol"] < row_open["symbol"]:
            #            print "missing in ipo_open.csv", row["symbol"], row_open["symbol"]
            j += 1
            row = sdf.iloc[j]

        if row["symbol"] > row_open["symbol"]:
            #            print "missing in ipo.csv", row["symbol"], row_open["symbol"]
            continue

        line = []
        line.append(row["symbol"])
        line.append(str(row_open["date"]))
        line.append("%.3f" % row["y"])
        line.append("%.3f" % row["x1"])
        line.append(str(row["x2"]))
        line.append("%.3f" % row["x3"])
        line.append("%.3f" % row["x4"])
        line.append(str(row_open["minutes"]))
        line.append(str(row["underwriter"]))
        line.append(str(row["exchange"]))
        print ','.join(line)
        j += 1
Exemple #2
0
def detect_overlap():
    df = ipo_lab.load_data("./ipo_open.csv")
    sdf = df.sort_values(by=['symbol'])
    prev_sym = ""
    for i, row in sdf.iterrows():
        if prev_sym == "":
            prev_sym = row["symbol"]
        else:
            if prev_sym == row["symbol"]:
                print row["symbol"], i
            prev_sym = row["symbol"]
Exemple #3
0
def plot_y_vs_luw():
    df = ipo_lab.load_data("./ipo.csv")
    id, num, p = ipo_lab.luw_dist(df, 1.02, 25)
    print id, num, p
    x = []
    sum = 0
    for n in num:
        x.append(sum + n)
        sum += n
    plt.step(x, p)
    plt.show()
Exemple #4
0
def plot_y_vs_x1_v101_104():
    df = ipo_lab.load_data("./ipo.csv")
    fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True)
    for i in range(101, 105):
        v = i / 100.0
        x, y = ipo_lab.histo(df, "x1", 20, v, 1)
        l = "vavle = %.2f" % v
        m = i - 101
        axs[m / 2][m % 2].step(x, y)
        axs[m / 2][m % 2].set_title(l)
    plt.show()
Exemple #5
0
def print_y_vs_exchange():
    df = ipo_lab.load_data("./ipo.csv")
    ex_res = ipo_lab.exchange_dist(df, 1.02)
    sum = 0
    p = 0
    exchange = ['NASDAQ', 'NYSE', 'AMEX']
    for i in range(len(ex_res)):
        r = ex_res[i]
        print exchange[i], r[1] * 1.0 / r[0], r[0]
        sum += r[0]
        p += r[1]
    print "TOTAL", p * 1.0 / sum, sum
Exemple #6
0
def plot_y_vs_x1_4_v102():
    df = ipo_lab.load_data("./out_bad.csv")
    fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True)
    cols = ['x1', 'x2', 'x3', 'x4']
    for i in range(4):
        x, y = ipo_lab.histo(df, cols[i], 20, 1.01, 1)
        print 'distribution of', cols[i]
        for l in range(len(x)):
            print x[l], y[l]
        axs[i / 2][i % 2].step(x, y)
        axs[i / 2][i % 2].set_title('y/' + cols[i])
    plt.show()
Exemple #7
0
def plot_x1_4_dist():
    df = ipo_lab.load_data("./ipo.csv")
    fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True)
    cols = ['x1', 'x2', 'x3', 'x4']
    ends = [50, 80, 30, 50]
    #    ylims = [1200, 1200, 300, 300]
    for i in range(4):
        x, y, step = ipo_lab.distribution(df, cols[i], 100)
        axs[i / 2][i % 2].bar(x[:100 - ends[i]], y[:100 - ends[i]], width=step)
        axs[i / 2][i % 2].set_title('distribution of ' + cols[i])
#        axs[i/2][i%2].set_ylim(0, ylims[i])

    plt.show()
Exemple #8
0
def print_min_dist():
    dfm = ipo_lab.load_data("./ipo3.csv")
    bin = [0] * 11
    for i, row in dfm.iterrows():
        if 0 <= row["t10"] <= 11:
            bin[row["t10"] - 1] += 1
        if 10 <= row["t10"] <= 11:
            if row["p30"] < row["y"]:
                print row["symbol"], row["y"], row["t10"], row["p30"], row[
                    "t30"]
        else:
            #print "unexpected minutes", row["symbol"], row["t10"]
            continue
Exemple #9
0
def plot_y_dist():
    df = ipo_lab.load_data("./ipo5.csv")
    x, y, step = ipo_lab.distribution(df, 'p1m', 100)
    print len(x)
    plt.bar(x[:40], y[:40], width=step)
    plt.show()
Exemple #10
0
    exchange = ['NASDAQ', 'NYSE', 'AMEX']
    for i in range(len(ex_res)):
        r = ex_res[i]
        print exchange[i], r[1] * 1.0 / r[0], r[0]
        sum += r[0]
        p += r[1]
    print "TOTAL", p * 1.0 / sum, sum


#def dist_t10():
#    df = ipo_lab.load_data("./ipo5.csv")
#    for i, row in df.iterrows():

#merge()
#exit()
#plot_y_dist()
#plot_x1_4_dist()
#plot_y_vs_x1_4_v102()
#plot_y_vs_x1_v101_104()
#plot_y_vs_luw()
#print_y_vs_exchange()
#exit()

#detect_overlap()
#print_min_dist()
df = ipo_lab.load_data("./ipo5.csv")
#ipo_lab.simple_filter(df, 1.02)
for i in range(102, 115):
    ipo_lab.simple_strat_2(df, "pc10", 1.0001, i / 100.0, range(2010, 2019))
#svm(1000, 1.02)