Esempio n. 1
0
def xandy(alldata, traindatestart, traindateend, testdateend):
    # 需要测试的数据
    teststartdate = traindateend
    traindata = onetimeintveraldata(alldata, traindatestart, traindateend)

    predictdata = onetimeintveraldata(alldata, teststartdate, testdateend)

    groupresult = getusernumberfeature(traindata)

    # a = predictdata.groupby("uid").size()
    # print a.shape, groupresult.shape
    times = pd.DatetimeIndex(predictdata["time"])
    dayofweek = times.dayofweek
    dayofweek = pd.DataFrame(
        dayofweek, index=predictdata.index, columns=["week"])

    face = countdiffface(predictdata)

    content = predictdata["content"]
    strfeature = pd.concat([
        content.str.count("\?", ),
        content.str.count("【"),
        content.str.count("http"),
        content.str.count("@"),
        content.str.count("#"),
        content.str.count("《"),
        content.str.count("分享"),
        content.str.count("~"),
        content.str.count("\[", ),
        content.str.count("发起了一个话题"),
        content.str.count("转载博"),
        content.str.count("("),
        content.str.count("!"),
        content.str.len(),
        dayofweek,
        predictdata[
            ["uid", "mid", "forward_count", "comment_count", "like_count"]]
        # face,

    ], axis=1)
    # 下面的这种merge会是上个月未出现的用户的数据都是为0

    train = pd.merge(groupresult, strfeature, left_index=True,
                     right_on="uid", how="inner")

    don_not_have_before = predictdata[~predictdata["mid"].isin(train["mid"])]
    # print mids

    train.fillna(0)
    # print train_x[:, :-4], train_y
    # print train_x.values[:, :-1], train_y.values
    return train, don_not_have_before
Esempio n. 2
0
def predictresult(models, traindatestart, traindateend, testdateend):
    data = readdata()
    don, need_train_data = usercluseter(data)

    train_data, don_not_have_before = xandy(
        need_train_data, traindatestart, traindateend, testdateend)

    pymid_and_ys = testp(models, train_data)

    ty = onetimeintveraldata(data, traindateend, testdateend)

    a = pd.merge(pymid_and_ys[0], pymid_and_ys[1],
                 left_index=True, right_index=True, how="outer")
    b = pd.merge(
        a, pymid_and_ys[2], left_index=True, right_index=True, how="outer")
    py = b.fillna(0)

    pyy = ty.copy(deep=True)
    print id(pyy), id(ty)
    # print py.ix[:, :]
    # sys, exit()
    mindex = pyy["mid"].isin(py.index)
    pyy.loc[
        mindex, ["forward_count", "comment_count", "like_count"]] = py.values[3:6]

    # print pyy.loc[~mindex, ["forward_count", "comment_count", "like_count"]]

    pyy.loc[~mindex, ["forward_count", "comment_count", "like_count"]] = 0

    f = pyy.values[:, 3:6]
    t = ty.values[:, 3:6]
    print f, t
    print scores(f, t)