def work2(df, f):
    setname = "sp500"
    taname = "base1"
    (phase1, phase2, phase3) = \
        feat_select.split_dates(feat_select.load_feat(taname, setname))
    df2 = feat_select.apply(df, phase2, "label5", "_p2")
    df2 = feat_select.apply(df2, phase3, "label5", "_p3")
    feat_select.ana2(df2, f, setname)
def work2(df, f):
    setname = "sp500"
    taname = "base1"
    (phase1, phase2, phase3) = \
        feat_select.split_dates(feat_select.load_feat(taname, setname))
    df2 = feat_select.apply(df,phase2, "label5", "_p2")
    df2 = feat_select.apply(df2,phase3, "label5", "_p3")
    feat_select.ana2(df2, f, setname)
Beispiel #3
0
def phase1_dump(taname, setname):
    dfTa = feat_select.load_feat(taname, setname)
    (phase1, phase2, phase3) = feat_select.split_dates(dfTa)
    dfmetas = feat_select.flat_metas(feat_select.get_metas(phase1))
    outdir = os.path.join(root, "data", "feat_select", "phase1_dump")
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    dfmetas.to_pickle(os.path.join(outdir, "%s_%s.pkl" % (setname, taname)))
Beispiel #4
0
def phase1_dump(taname, setname):
    dfTa = feat_select.load_feat(taname, setname)
    (phase1, phase2, phase3) = feat_select.split_dates(dfTa)
    dfmetas = feat_select.flat_metas(feat_select.get_metas(phase1))
    outdir = os.path.join(root, "data", "feat_select", "phase1_dump")
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    dfmetas.to_pickle(os.path.join(outdir, "%s_%s.pkl" % (setname, taname)))
def work(df, f):
    for i in range(10):
        frm = 50 * i
        to = frm + 50
        setname = "sp500R%dT%d" % (frm, to)
        taname = "base1"
        (phase1, phase2, phase3) = \
            feat_select.split_dates(feat_select.load_feat(taname, setname))
        df2 = feat_select.apply(df, phase2, "label5", "_p2")
        df2 = feat_select.apply(df2, phase3, "label5", "_p3")
        feat_select.ana2(df2, f, setname)
def work(df, f):
    for i in range(10):
        frm = 50  * i
        to  = frm + 50
        setname = "sp500R%dT%d" % (frm, to)
        taname = "base1"
        (phase1, phase2, phase3) = \
            feat_select.split_dates(feat_select.load_feat(taname, setname))
        df2 = feat_select.apply(df,phase2, "label5", "_p2")
        df2 = feat_select.apply(df2,phase3, "label5", "_p3")
        feat_select.ana2(df2, f, setname)
    orig_direct_n_set = abs_direct_n_set.copy()
    print len(abs_direct_p_set)
    print len(abs_direct_n_set)
    print >> f, "=" * 8
    for i in range(10):
        frm = 50 * i
        to = frm + 50
        setname = "sp500R%dT%d" % (frm, to)
        taname = "base1"
        filename = os.path.join(
            dataroot, "phase1_dump", "sp500_base1_apply_phase1_%s_%s_%d.pkl" %
            (setname, taname, args.depth))
        if not os.path.exists(filename):
            df2 = feat_select.apply(
                df,
                feat_select.split_dates(feat_select.load_feat(taname,
                                                              setname))[0],
                "label5", "_p1")

            df2.to_pickle(filename)
        df2 = pd.read_pickle(filename)
        feat_select.ana_apply(df2, "_p1", setname, f)
        cur_p_set = set(df2[df2.direct_p1 == 1].name.unique())
        cur_n_set = set(df2[df2.direct_p1 == -1].name.unique())
        abs_direct_p_set = abs_direct_p_set.intersection(cur_p_set)
        abs_direct_n_set = abs_direct_n_set.intersection(cur_n_set)
        print list(abs_direct_n_set)
    df.loc[:,"istable"] = df.apply(lambda row: 1 if row["name"] in abs_direct_p_set else \
             (1 if row["name"] in abs_direct_n_set else 0), axis = 1)
    df.loc[:, "direct"] = df.apply(lambda row: 0
                                   if row["istable"] == 0 else row["direct"],
                                   axis=1)