Esempio n. 1
0
	      print("\n")

	path = 'model.pth'
	#torch.save(model.state_dict(), path)

elif args.mode == "topNsimilar":
	path = 'model.pth'
	state_dict = torch.load(path)
	embedding = state_dict['input_embedding.weight'].to('cpu')
	Words = getSimilarWords(embedding, args.word, args.N_words, idx2word, word2idx)
	print(", ".join(words))

elif args.mode == "Analogy":
	path = 'model.pth'
	state_dict = torch.load(path)
	embedding = state_dict['input_embedding.weight'].to('cpu')
	words = getAnalogy(embedding, args.word1, args.word2, args.word3, 5, idx2word, word2idx)
	print(words[1])

elif args.mode == "GetScore":
	path = 'model.pth'
	state_dict = torch.load(path)
	embedding = state_dict['input_embedding.weight'].to('cpu')
	print(getScore(embedding, args.word1, args.word2, idx2word, word2idx))
		
elif args.mode == "plot":
	path = 'model.pth'
	state_dict = torch.load(path)
	embedding = state_dict['input_embedding.weight'].to('cpu')
	plotData(embedding,idx2word)
Esempio n. 2
0
    for index in word_indices:
        x[index] = 1

    # ===========================================================

    return x

# -------------------------- Testing Gaussian Kernel --------------------------------------

# Load from ex6data1
# You will have X, y as keys in the dict data
data = loadmat(os.path.join('Data', 'ex6data1.mat'))
X, y = data['X'], data['y'][:, 0]

# Plot training data
utils.plotData(X, y)
#pyplot.show()

# You should try to change the C value below and see how the decision
# boundary varies (e.g., try C = 1000)
C = 1

model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20)
utils.visualizeBoundaryLinear(X, y, model)
#pyplot.show()

x1 = np.array([1, 2, 1])
x2 = np.array([0, 4, -1])
sigma = 2

sim = gaussianKernel(x1, x2, sigma)
Esempio n. 3
0
    testInputs = [inputs[i] for i in testSample]
    testOutputs = [numericalOutputs[i] for i in testSample]

    # print(trainInputs)
    # print(testInputs)
    trainInputs, testInputs = \
        zScoreNormalization(trainInputs, testInputs)
    # print(trainInputs)
    # print(testInputs)

    trainFeature1 = [sample[0] for sample in trainInputs]
    trainFeature2 = [sample[1] for sample in trainInputs]
    trainFeature3 = [sample[2] for sample in trainInputs]
    trainFeature4 = [sample[3] for sample in trainInputs]

    plotData(trainFeature1, trainFeature2, trainFeature3, trainFeature4,
             "Train data")

    # tool
    classifier = linear_model.LogisticRegression(max_iter=1000)
    classifier.fit(trainInputs, trainOutputs)
    print("-----------tool-----------")
    print("Real: " + str(testOutputs))
    print("Comp: " + str(list(classifier.predict(testInputs))))
    print("Accuracy: " +
          str(accuracy(testOutputs, list(classifier.predict(testInputs)))))

    # manual

    classifierIrisSetosa = MyLogisticRegression()
    classifierIrisVersicolor = MyLogisticRegression()
    classifierIrisVirginica = MyLogisticRegression()
Esempio n. 4
0
def main():

    # Read training set
    print 
    print 'Reading training set...'
    train = utils.ReadFile('train.csv', 1)
    print 'Finished reading...\n'

    # Preliminary Statistics
    print 'Preliminary Statistics:'
    print np.shape(train)[0] - 1, 'people.', np.shape(train)[1] - 2, 'features.'
    print (train[1:,1] == '1').sum(), 'survivors.', (train[1:,1] == '0').sum(), 'deceased.\n'
    
    #Testing
    id = 10
    mask = train[1:,id] == ''
    #print list(set(tmp))
    #print train[1:,id]
    #print mask.sum()
        
    # Map string features to floats
    print 'Mapping Features to Floats.\n'
    dictN = {} # modified in call (useful for name feature) 
    dictC = {} # modified in call (useful for cabin feature)
    dat, dictN, dictC = utils.mapToF(train[1:,:], 0, dictN, dictC)

    # Class labels
    lab = np.array([int(h) for h in train[1:,1]])

    # Generate better model for missing Age feature
    #means = np.zeros(len(dictN), dtype = np.float64)
    #dat, means = utils.AgeModel(dat, dictN, means, 1)
    mask = dat[:,2] != -1.0
    dat2 = np.zeros((mask.sum(),9), dtype = np.float64)
    tar2 = np.zeros(mask.sum(), dtype = np.float64)
    dat, dat2, tar2 = utils.AgeModel2(dat, dat2, tar2, 1)

    # testing
    #mask = dat[:,2] == -1.0
    #print mask.sum()
  
    # Preliminary Plots
    print 'Generating preliminary scatter plots of data.\n'
    utils.PrelimPlots(dat, lab)
    utils.AgePlots(dat)

    #dat = utils.MeanNorm(dat)

    # ML algorithms
    print "Choosing best parameters for Random Forest algorithm:"
    optim = TestRandForest(dat, lab)

    # Plotting Learning Curve
    print 
    print "Plotting the learning curve\n"
    plotLearningCurve(dat, lab, optim)

    # Where is algorithm failing?
    print "Where is algorithm failing:\n"
    whereFailing(dat, lab, optim)

    # Read in test set
    print "Reading in Test Set\n"
    test = utils.ReadFile('test.csv', 0)

    # Map to floats
    testF, dictN, dictC = utils.mapToF(test[1:,:], 1, dictN, dictC)

    # Make better prediction for missing Age Features
    #testF, means = utils.AgeModel(testF, dictN, means, 0)
    testF, dat2, tar2 = utils.AgeModel2(testF, dat2, tar2, 0)

    # Generate scatter plot for test set
    utils.plotData(testF, 0) 

    #testF = utils.MeanNorm(testF)

    # Make prediction
    print "Making Prediction\n"
    clf = RandomForestClassifier(n_estimators = optim[0], 
                                 max_features = optim[1], 
                                 min_samples_split = 1)
    clf = clf.fit(dat, lab)
    pred = clf.predict(testF)

    # Now output prediction
    print "Outputting Prediction\n"
    utils.OutputFile(pred, train[0,:2], test[1,0], 0)

    print "Done"
Esempio n. 5
0
data = pd.read_csv('./data/ex2data1.txt',
                   sep=',',
                   names=['test1', 'test2', 'exam'])
X = data.iloc[:, :2].values
y = data.iloc[:, 2].values
print(data.head())

#  ==================== Part 1: Plotting ====================
#  We start the exercise by first plotting the data to understand the
#  the problem we are working with.
#
print(
    'Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.)'
)

plotData(X, y)
plt.legend(['Admitted', 'Not admitted'],
           loc='upper right',
           shadow=True,
           fontsize='x-large',
           numpoints=1)

plt.xlabel('Exam 1 score')
plt.ylabel('Exam 2 score')
# plt.show()
input("Program paused. Press Enter to continue...")

# ============ Part 2: Compute Cost and Gradient ============
#  In this part of the exercise, you will implement the cost and gradient
#  for logistic regression. You neeed to complete the code in
#  costFunction.m
Esempio n. 6
0
def main():

    # Read training set
    print
    print 'Reading training set...'
    train = utils.ReadFile('train.csv', 1)
    print 'Finished reading...\n'

    # Preliminary Statistics
    print 'Preliminary Statistics:'
    print np.shape(train)[0] - 1, 'people.', np.shape(
        train)[1] - 2, 'features.'
    print(train[1:, 1] == '1').sum(), 'survivors.', (
        train[1:, 1] == '0').sum(), 'deceased.\n'

    #Testing
    id = 10
    mask = train[1:, id] == ''
    #print list(set(tmp))
    #print train[1:,id]
    #print mask.sum()

    # Map string features to floats
    print 'Mapping Features to Floats.\n'
    dictN = {}  # modified in call (useful for name feature)
    dictC = {}  # modified in call (useful for cabin feature)
    dat, dictN, dictC = utils.mapToF(train[1:, :], 0, dictN, dictC)

    # Class labels
    lab = np.array([int(h) for h in train[1:, 1]])

    # Generate better model for missing Age feature
    #means = np.zeros(len(dictN), dtype = np.float64)
    #dat, means = utils.AgeModel(dat, dictN, means, 1)
    mask = dat[:, 2] != -1.0
    dat2 = np.zeros((mask.sum(), 9), dtype=np.float64)
    tar2 = np.zeros(mask.sum(), dtype=np.float64)
    dat, dat2, tar2 = utils.AgeModel2(dat, dat2, tar2, 1)

    # testing
    #mask = dat[:,2] == -1.0
    #print mask.sum()

    # Preliminary Plots
    print 'Generating preliminary scatter plots of data.\n'
    utils.PrelimPlots(dat, lab)
    utils.AgePlots(dat)

    #dat = utils.MeanNorm(dat)

    # ML algorithms
    print "Choosing best parameters for Random Forest algorithm:"
    optim = TestRandForest(dat, lab)

    # Plotting Learning Curve
    print
    print "Plotting the learning curve\n"
    plotLearningCurve(dat, lab, optim)

    # Where is algorithm failing?
    print "Where is algorithm failing:\n"
    whereFailing(dat, lab, optim)

    # Read in test set
    print "Reading in Test Set\n"
    test = utils.ReadFile('test.csv', 0)

    # Map to floats
    testF, dictN, dictC = utils.mapToF(test[1:, :], 1, dictN, dictC)

    # Make better prediction for missing Age Features
    #testF, means = utils.AgeModel(testF, dictN, means, 0)
    testF, dat2, tar2 = utils.AgeModel2(testF, dat2, tar2, 0)

    # Generate scatter plot for test set
    utils.plotData(testF, 0)

    #testF = utils.MeanNorm(testF)

    # Make prediction
    print "Making Prediction\n"
    clf = RandomForestClassifier(n_estimators=optim[0],
                                 max_features=optim[1],
                                 min_samples_split=1)
    clf = clf.fit(dat, lab)
    pred = clf.predict(testF)

    # Now output prediction
    print "Outputting Prediction\n"
    utils.OutputFile(pred, train[0, :2], test[1, 0], 0)

    print "Done"
Esempio n. 7
0
    regressor2.fit(trainInputs, trainOutputs)
    wPrim = [regressor2.intercept_, regressor2.coef_[0], regressor2.coef_[1]]
    print("-----manual-----")
    print("The learnt model: f(X,w) = " + str(wPrim[0]) + " + " +
          str(wPrim[1]) + " * X1 + " + str(wPrim[2]) + " * X2")

    # print("Real: " + str(testOutputs))
    # print("Computed: " + str(list(regressor2.predict(testInputs))))

    print("-----performance-----")
    print("Prediction error (tool): ",
          str(mean_squared_error(testOutputs, regressor2.predict(testInputs))))
    print("Prediction error (manual): ",
          str(meanSquareError(testOutputs, regressor2.predict(testInputs))))

    plotDataHistogram(gdpData, 'GDP')
    plotDataHistogram(freedomData, 'Freedom')
    plotDataHistogram(outputs, 'Happiness score')

    # for train and test data
    plotData(gdpData, freedomData, outputs, w, "Train & test data")

    # for train data
    plotData(trainGdp, trainFreedom, trainOutputs, w,
             "Train data and the learnt model")

    # for test data
    computedTestOutputs = regressor2.predict(testInputs)
    plotData2(testGdp, testFreedom, testOutputs, computedTestOutputs,
              "Computed(green) vs real(red) test data")