print("\n") path = 'model.pth' #torch.save(model.state_dict(), path) elif args.mode == "topNsimilar": path = 'model.pth' state_dict = torch.load(path) embedding = state_dict['input_embedding.weight'].to('cpu') Words = getSimilarWords(embedding, args.word, args.N_words, idx2word, word2idx) print(", ".join(words)) elif args.mode == "Analogy": path = 'model.pth' state_dict = torch.load(path) embedding = state_dict['input_embedding.weight'].to('cpu') words = getAnalogy(embedding, args.word1, args.word2, args.word3, 5, idx2word, word2idx) print(words[1]) elif args.mode == "GetScore": path = 'model.pth' state_dict = torch.load(path) embedding = state_dict['input_embedding.weight'].to('cpu') print(getScore(embedding, args.word1, args.word2, idx2word, word2idx)) elif args.mode == "plot": path = 'model.pth' state_dict = torch.load(path) embedding = state_dict['input_embedding.weight'].to('cpu') plotData(embedding,idx2word)
for index in word_indices: x[index] = 1 # =========================================================== return x # -------------------------- Testing Gaussian Kernel -------------------------------------- # Load from ex6data1 # You will have X, y as keys in the dict data data = loadmat(os.path.join('Data', 'ex6data1.mat')) X, y = data['X'], data['y'][:, 0] # Plot training data utils.plotData(X, y) #pyplot.show() # You should try to change the C value below and see how the decision # boundary varies (e.g., try C = 1000) C = 1 model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20) utils.visualizeBoundaryLinear(X, y, model) #pyplot.show() x1 = np.array([1, 2, 1]) x2 = np.array([0, 4, -1]) sigma = 2 sim = gaussianKernel(x1, x2, sigma)
testInputs = [inputs[i] for i in testSample] testOutputs = [numericalOutputs[i] for i in testSample] # print(trainInputs) # print(testInputs) trainInputs, testInputs = \ zScoreNormalization(trainInputs, testInputs) # print(trainInputs) # print(testInputs) trainFeature1 = [sample[0] for sample in trainInputs] trainFeature2 = [sample[1] for sample in trainInputs] trainFeature3 = [sample[2] for sample in trainInputs] trainFeature4 = [sample[3] for sample in trainInputs] plotData(trainFeature1, trainFeature2, trainFeature3, trainFeature4, "Train data") # tool classifier = linear_model.LogisticRegression(max_iter=1000) classifier.fit(trainInputs, trainOutputs) print("-----------tool-----------") print("Real: " + str(testOutputs)) print("Comp: " + str(list(classifier.predict(testInputs)))) print("Accuracy: " + str(accuracy(testOutputs, list(classifier.predict(testInputs))))) # manual classifierIrisSetosa = MyLogisticRegression() classifierIrisVersicolor = MyLogisticRegression() classifierIrisVirginica = MyLogisticRegression()
def main(): # Read training set print print 'Reading training set...' train = utils.ReadFile('train.csv', 1) print 'Finished reading...\n' # Preliminary Statistics print 'Preliminary Statistics:' print np.shape(train)[0] - 1, 'people.', np.shape(train)[1] - 2, 'features.' print (train[1:,1] == '1').sum(), 'survivors.', (train[1:,1] == '0').sum(), 'deceased.\n' #Testing id = 10 mask = train[1:,id] == '' #print list(set(tmp)) #print train[1:,id] #print mask.sum() # Map string features to floats print 'Mapping Features to Floats.\n' dictN = {} # modified in call (useful for name feature) dictC = {} # modified in call (useful for cabin feature) dat, dictN, dictC = utils.mapToF(train[1:,:], 0, dictN, dictC) # Class labels lab = np.array([int(h) for h in train[1:,1]]) # Generate better model for missing Age feature #means = np.zeros(len(dictN), dtype = np.float64) #dat, means = utils.AgeModel(dat, dictN, means, 1) mask = dat[:,2] != -1.0 dat2 = np.zeros((mask.sum(),9), dtype = np.float64) tar2 = np.zeros(mask.sum(), dtype = np.float64) dat, dat2, tar2 = utils.AgeModel2(dat, dat2, tar2, 1) # testing #mask = dat[:,2] == -1.0 #print mask.sum() # Preliminary Plots print 'Generating preliminary scatter plots of data.\n' utils.PrelimPlots(dat, lab) utils.AgePlots(dat) #dat = utils.MeanNorm(dat) # ML algorithms print "Choosing best parameters for Random Forest algorithm:" optim = TestRandForest(dat, lab) # Plotting Learning Curve print print "Plotting the learning curve\n" plotLearningCurve(dat, lab, optim) # Where is algorithm failing? print "Where is algorithm failing:\n" whereFailing(dat, lab, optim) # Read in test set print "Reading in Test Set\n" test = utils.ReadFile('test.csv', 0) # Map to floats testF, dictN, dictC = utils.mapToF(test[1:,:], 1, dictN, dictC) # Make better prediction for missing Age Features #testF, means = utils.AgeModel(testF, dictN, means, 0) testF, dat2, tar2 = utils.AgeModel2(testF, dat2, tar2, 0) # Generate scatter plot for test set utils.plotData(testF, 0) #testF = utils.MeanNorm(testF) # Make prediction print "Making Prediction\n" clf = RandomForestClassifier(n_estimators = optim[0], max_features = optim[1], min_samples_split = 1) clf = clf.fit(dat, lab) pred = clf.predict(testF) # Now output prediction print "Outputting Prediction\n" utils.OutputFile(pred, train[0,:2], test[1,0], 0) print "Done"
data = pd.read_csv('./data/ex2data1.txt', sep=',', names=['test1', 'test2', 'exam']) X = data.iloc[:, :2].values y = data.iloc[:, 2].values print(data.head()) # ==================== Part 1: Plotting ==================== # We start the exercise by first plotting the data to understand the # the problem we are working with. # print( 'Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.)' ) plotData(X, y) plt.legend(['Admitted', 'Not admitted'], loc='upper right', shadow=True, fontsize='x-large', numpoints=1) plt.xlabel('Exam 1 score') plt.ylabel('Exam 2 score') # plt.show() input("Program paused. Press Enter to continue...") # ============ Part 2: Compute Cost and Gradient ============ # In this part of the exercise, you will implement the cost and gradient # for logistic regression. You neeed to complete the code in # costFunction.m
def main(): # Read training set print print 'Reading training set...' train = utils.ReadFile('train.csv', 1) print 'Finished reading...\n' # Preliminary Statistics print 'Preliminary Statistics:' print np.shape(train)[0] - 1, 'people.', np.shape( train)[1] - 2, 'features.' print(train[1:, 1] == '1').sum(), 'survivors.', ( train[1:, 1] == '0').sum(), 'deceased.\n' #Testing id = 10 mask = train[1:, id] == '' #print list(set(tmp)) #print train[1:,id] #print mask.sum() # Map string features to floats print 'Mapping Features to Floats.\n' dictN = {} # modified in call (useful for name feature) dictC = {} # modified in call (useful for cabin feature) dat, dictN, dictC = utils.mapToF(train[1:, :], 0, dictN, dictC) # Class labels lab = np.array([int(h) for h in train[1:, 1]]) # Generate better model for missing Age feature #means = np.zeros(len(dictN), dtype = np.float64) #dat, means = utils.AgeModel(dat, dictN, means, 1) mask = dat[:, 2] != -1.0 dat2 = np.zeros((mask.sum(), 9), dtype=np.float64) tar2 = np.zeros(mask.sum(), dtype=np.float64) dat, dat2, tar2 = utils.AgeModel2(dat, dat2, tar2, 1) # testing #mask = dat[:,2] == -1.0 #print mask.sum() # Preliminary Plots print 'Generating preliminary scatter plots of data.\n' utils.PrelimPlots(dat, lab) utils.AgePlots(dat) #dat = utils.MeanNorm(dat) # ML algorithms print "Choosing best parameters for Random Forest algorithm:" optim = TestRandForest(dat, lab) # Plotting Learning Curve print print "Plotting the learning curve\n" plotLearningCurve(dat, lab, optim) # Where is algorithm failing? print "Where is algorithm failing:\n" whereFailing(dat, lab, optim) # Read in test set print "Reading in Test Set\n" test = utils.ReadFile('test.csv', 0) # Map to floats testF, dictN, dictC = utils.mapToF(test[1:, :], 1, dictN, dictC) # Make better prediction for missing Age Features #testF, means = utils.AgeModel(testF, dictN, means, 0) testF, dat2, tar2 = utils.AgeModel2(testF, dat2, tar2, 0) # Generate scatter plot for test set utils.plotData(testF, 0) #testF = utils.MeanNorm(testF) # Make prediction print "Making Prediction\n" clf = RandomForestClassifier(n_estimators=optim[0], max_features=optim[1], min_samples_split=1) clf = clf.fit(dat, lab) pred = clf.predict(testF) # Now output prediction print "Outputting Prediction\n" utils.OutputFile(pred, train[0, :2], test[1, 0], 0) print "Done"
regressor2.fit(trainInputs, trainOutputs) wPrim = [regressor2.intercept_, regressor2.coef_[0], regressor2.coef_[1]] print("-----manual-----") print("The learnt model: f(X,w) = " + str(wPrim[0]) + " + " + str(wPrim[1]) + " * X1 + " + str(wPrim[2]) + " * X2") # print("Real: " + str(testOutputs)) # print("Computed: " + str(list(regressor2.predict(testInputs)))) print("-----performance-----") print("Prediction error (tool): ", str(mean_squared_error(testOutputs, regressor2.predict(testInputs)))) print("Prediction error (manual): ", str(meanSquareError(testOutputs, regressor2.predict(testInputs)))) plotDataHistogram(gdpData, 'GDP') plotDataHistogram(freedomData, 'Freedom') plotDataHistogram(outputs, 'Happiness score') # for train and test data plotData(gdpData, freedomData, outputs, w, "Train & test data") # for train data plotData(trainGdp, trainFreedom, trainOutputs, w, "Train data and the learnt model") # for test data computedTestOutputs = regressor2.predict(testInputs) plotData2(testGdp, testFreedom, testOutputs, computedTestOutputs, "Computed(green) vs real(red) test data")