def train_model(classifier, train_csv, test_csv, model_file): train_set = digit_io.read_from_csv(train_csv) test_set = digit_io.read_from_csv(test_csv) assert train_set.ndim == 2 assert test_set.ndim == 2 data_train, target_train = train_set[0::, 1::], train_set[0::, 0] test_train, target_test = test_set[0::, 1::], test_set[0::, 0] # classifier = RandomForestClassifier(n_estimators=100) learn_model(classifier, data_train, test_train, target_train, target_test, model_file)
def split_entity_dataset(): train_csv_path = "../data/train.csv" data_set = digit_io.read_from_csv(train_csv_path) train_set, test_set = data_split(data_set) digit_io.write_to_csv(train_set, '../data/training/train.csv') digit_io.write_to_csv(test_set, '../data/training/test.csv')
def predict(model_file, test_file, filename): classifier = load_model(model_file) test = digit_io.read_from_csv(test_file) predicted = classifier.predict(test) if filename != None: # digit_io.write_to_csv(predicted, filename, header=None) digit_io.write_predicted_result(predicted, filename) else: print predicted
def extract_small_dataset(): train_csv_path = "../data/train.csv" data_set = digit_io.read_from_csv(train_csv_path) train_set, test_set = data_split(data_set) small_train, small_test = data_split(test_set) digit_io.write_to_csv(small_train, "../data/training/small_train.csv") digit_io.write_to_csv(small_test, "../data/training/small_test.csv")
def predict(model_file, test_file, filename) : classifier = load_model(model_file) test = digit_io.read_from_csv(test_file) predicted = classifier.predict(test) if filename != None: # digit_io.write_to_csv(predicted, filename, header=None) digit_io.write_predicted_result(predicted, filename) else: print predicted