def merge(): df_open = ipo_lab.load_data("./ipo_open.csv") sdf_open = df_open.sort_values(by=['symbol']) with open("./ipo5.csv") as f: line = f.readline().strip() ls = line.split(',') ls.insert(1, "date") print ','.join(ls) line = f.readline().strip() while line: ls = line.split(',') symbol = ls[0] d = df_open.loc[df_open["symbol"] == symbol].iloc[0, 0] if d is None: print "--------- missing", symbol continue ls.insert(1, str(d)) print ','.join(ls) line = f.readline().strip() print line exit() df = ipo_lab.load_data("./ipo.csv") sdf = df.sort_values(by=["symbol"]) print "symbol,date,y,x1,x2,x3,x4,x5,underwriter,exchange" j = 0 for i in range(len(sdf_open)): row_open = sdf_open.iloc[i] row = sdf.iloc[j] while row["symbol"] < row_open["symbol"]: # print "missing in ipo_open.csv", row["symbol"], row_open["symbol"] j += 1 row = sdf.iloc[j] if row["symbol"] > row_open["symbol"]: # print "missing in ipo.csv", row["symbol"], row_open["symbol"] continue line = [] line.append(row["symbol"]) line.append(str(row_open["date"])) line.append("%.3f" % row["y"]) line.append("%.3f" % row["x1"]) line.append(str(row["x2"])) line.append("%.3f" % row["x3"]) line.append("%.3f" % row["x4"]) line.append(str(row_open["minutes"])) line.append(str(row["underwriter"])) line.append(str(row["exchange"])) print ','.join(line) j += 1
def detect_overlap(): df = ipo_lab.load_data("./ipo_open.csv") sdf = df.sort_values(by=['symbol']) prev_sym = "" for i, row in sdf.iterrows(): if prev_sym == "": prev_sym = row["symbol"] else: if prev_sym == row["symbol"]: print row["symbol"], i prev_sym = row["symbol"]
def plot_y_vs_luw(): df = ipo_lab.load_data("./ipo.csv") id, num, p = ipo_lab.luw_dist(df, 1.02, 25) print id, num, p x = [] sum = 0 for n in num: x.append(sum + n) sum += n plt.step(x, p) plt.show()
def plot_y_vs_x1_v101_104(): df = ipo_lab.load_data("./ipo.csv") fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True) for i in range(101, 105): v = i / 100.0 x, y = ipo_lab.histo(df, "x1", 20, v, 1) l = "vavle = %.2f" % v m = i - 101 axs[m / 2][m % 2].step(x, y) axs[m / 2][m % 2].set_title(l) plt.show()
def print_y_vs_exchange(): df = ipo_lab.load_data("./ipo.csv") ex_res = ipo_lab.exchange_dist(df, 1.02) sum = 0 p = 0 exchange = ['NASDAQ', 'NYSE', 'AMEX'] for i in range(len(ex_res)): r = ex_res[i] print exchange[i], r[1] * 1.0 / r[0], r[0] sum += r[0] p += r[1] print "TOTAL", p * 1.0 / sum, sum
def plot_y_vs_x1_4_v102(): df = ipo_lab.load_data("./out_bad.csv") fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True) cols = ['x1', 'x2', 'x3', 'x4'] for i in range(4): x, y = ipo_lab.histo(df, cols[i], 20, 1.01, 1) print 'distribution of', cols[i] for l in range(len(x)): print x[l], y[l] axs[i / 2][i % 2].step(x, y) axs[i / 2][i % 2].set_title('y/' + cols[i]) plt.show()
def plot_x1_4_dist(): df = ipo_lab.load_data("./ipo.csv") fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True) cols = ['x1', 'x2', 'x3', 'x4'] ends = [50, 80, 30, 50] # ylims = [1200, 1200, 300, 300] for i in range(4): x, y, step = ipo_lab.distribution(df, cols[i], 100) axs[i / 2][i % 2].bar(x[:100 - ends[i]], y[:100 - ends[i]], width=step) axs[i / 2][i % 2].set_title('distribution of ' + cols[i]) # axs[i/2][i%2].set_ylim(0, ylims[i]) plt.show()
def print_min_dist(): dfm = ipo_lab.load_data("./ipo3.csv") bin = [0] * 11 for i, row in dfm.iterrows(): if 0 <= row["t10"] <= 11: bin[row["t10"] - 1] += 1 if 10 <= row["t10"] <= 11: if row["p30"] < row["y"]: print row["symbol"], row["y"], row["t10"], row["p30"], row[ "t30"] else: #print "unexpected minutes", row["symbol"], row["t10"] continue
def plot_y_dist(): df = ipo_lab.load_data("./ipo5.csv") x, y, step = ipo_lab.distribution(df, 'p1m', 100) print len(x) plt.bar(x[:40], y[:40], width=step) plt.show()
exchange = ['NASDAQ', 'NYSE', 'AMEX'] for i in range(len(ex_res)): r = ex_res[i] print exchange[i], r[1] * 1.0 / r[0], r[0] sum += r[0] p += r[1] print "TOTAL", p * 1.0 / sum, sum #def dist_t10(): # df = ipo_lab.load_data("./ipo5.csv") # for i, row in df.iterrows(): #merge() #exit() #plot_y_dist() #plot_x1_4_dist() #plot_y_vs_x1_4_v102() #plot_y_vs_x1_v101_104() #plot_y_vs_luw() #print_y_vs_exchange() #exit() #detect_overlap() #print_min_dist() df = ipo_lab.load_data("./ipo5.csv") #ipo_lab.simple_filter(df, 1.02) for i in range(102, 115): ipo_lab.simple_strat_2(df, "pc10", 1.0001, i / 100.0, range(2010, 2019)) #svm(1000, 1.02)