def runLinearRegressionExample(filename): start_time = time.time() X, y = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.fillMissingData(X, 1, X.shape[1]) elapsed_time = time.time() - start_time print("Fill Missing Data: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.computeCategorization(X, 0) elapsed_time = time.time() - start_time print("Compute Categorization: %.2f" % elapsed_time, "segundos.") start_time = time.time() XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.8) elapsed_time = time.time() - start_time print("Split Train Test sets: %.2f" % elapsed_time, "segundos.") start_time = time.time() computeLinearRegressionModel(XTrain, yTrain, XTest, yTest) elapsed_time = time.time() - start_time print("Compute Linear Regression: %.2f" % elapsed_time, "segundos.")
def runMultipleLinearRegressionExample(filename): start_time = time.time() X, y = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.fillMissingData(X, 0, 2) elapsed_time = time.time() - start_time print("Fill Missing Data: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.computeCategorization(X, 3) elapsed_time = time.time() - start_time print("Compute Categorization: %.2f" % elapsed_time, "segundos.") start_time = time.time() XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.8) elapsed_time = time.time() - start_time print("Split Train Test sets: %.2f" % elapsed_time, "segundos.") start_time = time.time() XTrain, XTest = computeAutomaticBackwardElimination( XTrain, yTrain, XTest, 0.05) elapsed_time = time.time() - start_time print("Compute Automatic Backward Elimination: %.2f" % elapsed_time, "segundos.") start_time = time.time() computeMultipleLinearRegressionModel(XTrain, yTrain, XTest, yTest) elapsed_time = time.time() - start_time print("Compute Multiple Linear Regression: %.2f" % elapsed_time, "segundos.") '''start_time = time.time()
def preprocessDataCrossValidation(args, use_scaling): X, y, csv = pre.loadDataset(args.dataset, args.delimiter) if (args.fill_missing_data_columns is not None): columns = args.fill_missing_data_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X = pre.fillMissingData(X, n + offset) offset += n if (args.one_hot_encoding_columns is not None): columns = args.one_hot_encoding_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X, o = pre.computeCategorization(X, n + offset) offset += o - 1 if (use_scaling == True): X = pre.computeScaling(X) if (len(X) == 2): X = X[0] return X, y
def preprocessData(args, use_scaling): X, y, csv = pre.loadDataset(args.dataset, args.delimiter) if (args.fill_missing_data_columns is not None): columns = args.fill_missing_data_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X = pre.fillMissingData(X, n + offset) offset += n if (args.one_hot_encoding_columns is not None): columns = args.one_hot_encoding_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X, o = pre.computeCategorization(X, n + offset) offset += o - 1 XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets( X, y, args.test_size) if (use_scaling == True): XTrain = pre.computeScaling(XTrain) XTest = pre.computeScaling(XTest) if (len(XTrain) == 2): XTrain = XTrain[0] if (len(XTest) == 2): XTest = XTest[0] return XTrain, XTest, yTrain, yTest
def runDecisionTreeRegressionExample(filename): start_time = time.time() X, y, csv = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() computeDecisionTreeRegressionModel(X, y) elapsed_time = time.time() - start_time print("Compute Decision Tree Regression: %.2f" % elapsed_time, "segundos.")
def runRandomForestRegressionExample(filename): start_time = time.time() X, y, csv = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() computeRandomForestRegressionModel(X, y, 100) elapsed_time = time.time() - start_time print("Compute Random Forest Regression: %.2f" % elapsed_time, "segundos.")
def runPolynomialLinearRegressionExample(filename): start_time = time.time() X, y, csv = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() computePolynomialLinearRegressionModel(X, y, 2) elapsed_time = time.time() - start_time print("Compute Polynomial Linear Regression: %.2f" % elapsed_time, "segundos.")
def runDecisionTreeRegressionExample(filename): start_time = time.time() X, y, csv = pre.loadDataset(filename) elapsed_time = time.time() - start_time #print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() regressor = computeDecisionTreeRegressionModel(X, y) elapsed_time = time.time() - start_time print("Compute Decision Tree Regression: %.2f" % elapsed_time, "segundos.") from sklearn.metrics import r2_score return r2_score(y, regressor.predict(X))
def preprocessData(filename): X, y, csv = pre.loadDataset(filename, ",") X = pre.fillMissingData(X, 2, 3) #sex X = pre.computeCategorization(X) #embark X = pre.computeCategorization(X) XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.15) XTrain = pre.computeScaling(XTrain) XTest = pre.computeScaling(XTest) return XTrain, XTest, yTrain, yTest
def computeLogisticRegressionExample(filename): X, y, csv = pre.loadDataset(filename, ",") X = pre.fillMissingData(X, 2, 3) #sex X = pre.computeCategorization(X) #embark X = pre.computeCategorization(X) XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.15) XTrain = pre.computeScaling(XTrain) XTest = pre.computeScaling(XTest) classifier = computeLogisticRegressionModel(XTrain, yTrain, XTest) yPred = predictModel(classifier, XTest) return evaluateModel(classifier, yPred, yTest)
def trainModel(n_iters=100000, teacher_forcing_ratio=0., print_every=1000, plot_every=100, learning_rate=0.01, max_length=MAX_LENGTH): training_pairs, vocab_size, word2ix, ix2word = loadDataset() encoder, decoder = loadModel(vocab_size) print("Training the model ... ") start = time.time() plot_losses = [] print_loss_total = 0 # reset every print_every plot_loss_total = 0 # reset every plot_every encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) criterion = nn.NLLLoss() for iter in range(1, n_iters + 1): training_pair = training_pairs[iter - 1] input_variable = training_pair['input'] target_variable = training_pair['target'] input_variable = Variable(torch.LongTensor(input_variable).view(-1, 1)) target_variable = Variable( torch.LongTensor(target_variable).view(-1, 1)) if USE_CUDA: input_variable = input_variable.cuda() target_variable = target_variable.cuda() print(input_variable) loss = trainIter(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=max_length, teacher_forcing_ratio=teacher_forcing_ratio) print_loss_total += loss plot_loss_total += loss # Keeping track of average loss and printing results on screen if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (utils.timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg)) # Keeping track of average loss and plotting in figure if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) if min(plot_losses) == plot_loss_avg: #we save this version of the model torch.save(encoder.state_dict(), "encoder.ckpt") torch.save(decoder.state_dict(), "decoder.ckpt") plot_loss_total = 0 utils.showPlot(plot_losses)
# break plt.plot(np.array(loss_plot) / print_every, 'r') plt.xlabel('number of examples') plt.ylabel('Loss') plt.savefig('loss.png') plt.clf np.savetxt('loss.csv', np.array(loss_plot) / print_every) mean_epoch_loss /= len(dataset) print("Epoch %d ----- Loss : %.4f" % (epoch, mean_epoch_loss)) if save_model: torch.save(model.state_dict(), 'trained_model.pt') if __name__ == "__main__": from preprocessing import loadDataset import numpy as np pairs, vocab_size, word2id, id2word = loadDataset() np.random.shuffle(pairs) model = Seq2Seq(input_size=vocab_size, embedding_size=256, hidden_size=256, num_layers=5, teacher_forcing_prob=0.5).to(device) train(model, pairs)