def dump_train():
    _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data()

    train_df = f.make_data_df(train_gray_data, labels)
    test_df = f.make_test_df(test_gray_data)

    train_df = train_df.reset_index()
    test_df = test_df.reset_index()

    train_df.columns = ["pngname", "input", "label"]
    test_df.columns = ["pngname", "input"]

    fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
    feature_name_list = [s.split("__")[1] for s in fu.get_feature_names()]
    feature_name_list.append("target")
    train_X = fu.fit_transform(train_df)
    train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten()))
    train_X, train_y = cl.downsampling_data(train_X, train_y, 0.2)
    train_dump = pd.DataFrame(np.c_[train_X, train_y], columns=feature_name_list)
    dump_path = os.path.abspath(os.path.dirname(__file__)) +\
        "/../tmp/train_dump"
    train_dump.to_csv(dump_path + "/train_dump.csv", index=False)
def prediction(clf_name):

    print "****************classifier****************"
    print clf_dict[clf_name]["clf"]
    clf = clf_dict[clf_name]["clf"]

    _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data()
    train_keys = train_gray_data.keys()
    test_keys = test_gray_data.keys()

    train_df = f.make_data_df(train_gray_data, labels)
    test_df = f.make_test_df(test_gray_data)

    train_df = train_df.reset_index()
    test_df = test_df.reset_index()

    train_df.columns = ["pngname", "input", "label"]
    test_df.columns = ["pngname", "input"]

    # operation check
    if clf_name == "SGDB":
        # train_df, train_keys, test_df, test_keys  = pre.make_checkdata(mode="df")
        # train_df, train_keys, _, _  = pre.make_checkdata(mode="df")

        for i in xrange(len(train_keys)):

            train_X, train_y = classify.set_traindata(train_df, train_keys[i])
            clf.partial_fit(train_X, train_y)

    else:

        # operation check
        # train_df, train_keys, _, _  = pre.make_checkdata(mode="df")
        fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
        train_X = fu.fit_transform(train_df)
        train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten()))
        train_X, train_y = classify.downsampling_data(train_X, train_y, 0.2)

        clf.fit(train_X, train_y)
    clf_dir = os.path.abspath(os.path.dirname(__file__)) +\
        "/../tmp/fit_instance/"
    now = datetime.datetime.now()
    savefile = clf_dir + clf_name + now.strftime("%Y_%m_%d_%H_%M_%S") + ".pickle"
    fi = open(savefile, "w")
    pickle.dump(clf, fi)
    fi.close()

    for i in xrange(len(test_keys)):

        test_img = test_df[(test_df["pngname"] == test_keys[i])]["input"].as_matrix()[0]

        imgname = test_keys[i]
        shape = test_img.shape

        test_img = {test_keys[i]: test_img}
        X_test = convert_testdata(test_img)
        output = clf.predict(X_test)
        output = np.asarray(output)
        zo = np.vectorize(zero_one)
        output = zo(output).reshape(shape)

        tmp = []

        for row in xrange(len(output)):
            for column in xrange(len(output[row])):
                id_ = imgname + "_" + str(row + 1) + "_" + str(column + 1)
                value = output[row][column]

                pix = [id_, value]
                tmp.append(pix)

        if i == 0:
            predict_df = pd.DataFrame(tmp)

        else:
            tmp_df = pd.DataFrame(tmp)
            predict_df = pd.concat([predict_df, tmp_df])

    predict_df.columns = ["id", "value"]

    now = datetime.datetime.now()
    submission_path = SUBMISSION_DIR + "/submission_" + now.strftime("%Y_%m_%d_%H_%M_%S") + ".csv"
    predict_df.to_csv(submission_path, header=True, index=False)