def main(): targetFile = "../Dados/main-features2.csv" trainFile = '../boa.csv' trainFeatures = first_dataset(trainFile) trainDataSet = load_dataset(trainFile, trainFeatures).drop('id', 1) trainFeatures.pop(0) trainFeatures.pop() targetFeatures = first_dataset(targetFile) targetDataSet = load_dataset(targetFile, targetFeatures) print(trainFeatures) print(targetFeatures) createNeuralNetworkClassifier(trainDataSet, trainFeatures, targetDataSet)
def generateOutput(): data_file = '../boa.csv' target_file = '../Dados/main-features2.csv' name_of_features = first_dataset(data_file) target_features = first_dataset(target_file) dataSet = load_dataset(data_file, name_of_features) targetDataSet = load_dataset(target_file, target_features) dataSet = dataSet.drop('id', 1) name_of_features.pop(0) name_of_features.pop() #target_features.pop(0) create_target(dataSet, name_of_features, targetDataSet, target_features)
def main(): targetFile = "../Dados/main-features2.csv" trainFile = '../boa.csv' trainFeatures = first_dataset(trainFile) trainDataSet = load_dataset(trainFile, trainFeatures).drop('id', 1) trainFeatures.pop(0) trainFeatures.pop() targetFeatures = first_dataset(targetFile) targetDataSet = load_dataset(targetFile, targetFeatures) print(trainFeatures) print(targetFeatures) decisionTree = createTreeClassifier(trainDataSet, trainFeatures, targetDataSet) visualize_tree(decisionTree, trainFeatures)
def train(): file_path = "../Dados/master-features.csv" data_file = '../boa.csv' name_of_features = first_dataset(data_file) dataSet = load_dataset(data_file, name_of_features) dataSet = dataSet.drop('id', 1) name_of_features.pop(0) name_of_features.pop() print(name_of_features)
def gridSearch(targetFile, targetFeatures): data_file = '../boa.csv' name_of_features = first_dataset(data_file) dataSet = load_dataset(data_file, name_of_features) dataSet = dataSet.drop('id', 1) name_of_features.pop(0) name_of_features.pop() print(name_of_features) print(len(name_of_features), len(targetFeatures)) # prepare a uniform distribution to sample for the alpha parameter #param_grid = {'alpha': sp_rand()} # create and fit a ridge regression model, testing random alpha values model = MLPClassifier() parameters = { #'learning_rate': ["constant", "invscaling"], 'hidden_layer_sizes': [(100, 10), (100, 20), (100, 30), (100, 1), (100, 5)], 'learning_rate': ['constant', 'invscaling'], 'learning_rate_init': [0.05, 0.01, 0.1], #'alpha': [10.0 ** -np.arange(1, 7)], 'activation': ['identity', 'logistic', 'tanh', 'relu'] } rsearch = model_selection.GridSearchCV(estimator=model, param_grid=parameters, n_jobs=-1, cv=10) rsearch.fit(dataSet[name_of_features], dataSet['target']) print(rsearch) # summarize the results of the random parameter search print(rsearch.best_score_) print(rsearch.best_estimator_.alpha) if targetFile: prediction = rsearch.predict(targetFile[targetFeatures]) cols = ['Predicted'] cenas = targetFile['id_target'] features = pd.DataFrame(prediction, columns=cols) file_name = "output2.csv" features.set_index(cenas, inplace=True) features.to_csv(file_name, sep=',', encoding='utf-8')