def get_data_Kfold(mode): ''' get X, y data :rtype: tuple ''' if mode == "train": _, _, _, train_gray_data, _, _, labels = i_p.load_data() data_df = f.make_data_df(train_gray_data, labels) data_df = data_df.reset_index() data_df.columns = ["pngname", "input", "label"] keys = np.asarray(train_gray_data.keys()) kf = cross_validation.KFold(n=len(keys), n_folds=5) return data_df, keys, kf elif mode == "test": _, _, _, _, test_gray_data, _, _ = i_p.load_data() return test_gray_data else: print "mode error!" print "set \"train\" or \"test\"" quit()
def convert_traindata(train_gray_data, labels): data_df = f.make_data_df(train_gray_data, labels) fu = FeatureUnion(transformer_list=f.feature_transformer_rule) Std = preprocessing.StandardScaler() X_train = fu.fit_transform(data_df) y_train = np.concatenate(data_df["label"].apply(lambda x: x.flatten())) X_train = Std.fit_transform(X_train) return X_train, y_train
def get_data(): ''' get X, y data :rtype: tuple ''' _, _, _, train_gray_data, _, _, labels = i_p.load_data() data_df = f.make_data_df(train_gray_data, labels) fu = FeatureUnion(transformer_list=f.feature_transformer_rule) X = fu.fit_transform(data_df) y = np.concatenate(data_df["label"].apply(lambda x: x.flatten())) return (X, y)
def make_checkdata(mode="df"): fu = FeatureUnion(transformer_list=f.feature_transformer_rule) Std = preprocessing.StandardScaler() _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data() train_keys = train_gray_data.keys()[:2] train_inputs = {} train_labels = {} for i in xrange(len(train_keys)): input_ = train_gray_data[train_keys[i]] label = labels[train_keys[i]] train_inputs.update({train_keys[i]:input_}) train_labels.update({train_keys[i]:label}) test_keys = test_gray_data.keys()[:2] test_inputs = {} for i in xrange(len(test_keys)): input_ = test_gray_data[test_keys[i]] test_inputs.update({test_keys[i]:input_}) train_df = f.make_data_df(train_inputs, train_labels) test_df = f.make_test_df(test_inputs) if mode == "df": train_df = train_df.reset_index() test_df = test_df.reset_index() train_df.columns = ["pngname", "input", "label"] test_df.columns = ["pngname", "input"] return train_df, train_keys, test_df, test_keys elif mode == "feature": X_train = fu.fit_transform(train_df) X_train = Std.fit_transform(X_train) y_train = np.concatenate(train_df["label"].apply(lambda x: x.flatten())) X_test = fu.fit_transform(test_df) X_test = Std.fit_transform(X_test) return X_train, y_train, X_test
def dump_train(): _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data() train_df = f.make_data_df(train_gray_data, labels) test_df = f.make_test_df(test_gray_data) train_df = train_df.reset_index() test_df = test_df.reset_index() train_df.columns = ["pngname", "input", "label"] test_df.columns = ["pngname", "input"] fu = FeatureUnion(transformer_list=f.feature_transformer_rule) feature_name_list = [s.split("__")[1] for s in fu.get_feature_names()] feature_name_list.append("target") train_X = fu.fit_transform(train_df) train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten())) train_X, train_y = cl.downsampling_data(train_X, train_y, 0.2) train_dump = pd.DataFrame(np.c_[train_X, train_y], columns=feature_name_list) dump_path = os.path.abspath(os.path.dirname(__file__)) +\ "/../tmp/train_dump" train_dump.to_csv(dump_path + "/train_dump.csv", index=False)
def prediction(clf_name): print "****************classifier****************" print clf_dict[clf_name]["clf"] clf = clf_dict[clf_name]["clf"] _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data() train_keys = train_gray_data.keys() test_keys = test_gray_data.keys() train_df = f.make_data_df(train_gray_data, labels) test_df = f.make_test_df(test_gray_data) train_df = train_df.reset_index() test_df = test_df.reset_index() train_df.columns = ["pngname", "input", "label"] test_df.columns = ["pngname", "input"] # operation check if clf_name == "SGDB": # train_df, train_keys, test_df, test_keys = pre.make_checkdata(mode="df") # train_df, train_keys, _, _ = pre.make_checkdata(mode="df") for i in xrange(len(train_keys)): train_X, train_y = classify.set_traindata(train_df, train_keys[i]) clf.partial_fit(train_X, train_y) else: # operation check # train_df, train_keys, _, _ = pre.make_checkdata(mode="df") fu = FeatureUnion(transformer_list=f.feature_transformer_rule) train_X = fu.fit_transform(train_df) train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten())) train_X, train_y = classify.downsampling_data(train_X, train_y, 0.2) clf.fit(train_X, train_y) clf_dir = os.path.abspath(os.path.dirname(__file__)) +\ "/../tmp/fit_instance/" now = datetime.datetime.now() savefile = clf_dir + clf_name + now.strftime("%Y_%m_%d_%H_%M_%S") + ".pickle" fi = open(savefile, "w") pickle.dump(clf, fi) fi.close() for i in xrange(len(test_keys)): test_img = test_df[(test_df["pngname"] == test_keys[i])]["input"].as_matrix()[0] imgname = test_keys[i] shape = test_img.shape test_img = {test_keys[i]: test_img} X_test = convert_testdata(test_img) output = clf.predict(X_test) output = np.asarray(output) zo = np.vectorize(zero_one) output = zo(output).reshape(shape) tmp = [] for row in xrange(len(output)): for column in xrange(len(output[row])): id_ = imgname + "_" + str(row + 1) + "_" + str(column + 1) value = output[row][column] pix = [id_, value] tmp.append(pix) if i == 0: predict_df = pd.DataFrame(tmp) else: tmp_df = pd.DataFrame(tmp) predict_df = pd.concat([predict_df, tmp_df]) predict_df.columns = ["id", "value"] now = datetime.datetime.now() submission_path = SUBMISSION_DIR + "/submission_" + now.strftime("%Y_%m_%d_%H_%M_%S") + ".csv" predict_df.to_csv(submission_path, header=True, index=False)