Example #1
0
def main():
    startTime = datetime.datetime.now()
    print("Start time: {}".format(startTime))

    # -- Classifier? --
    print("60-473 assignment 02")
    print("0. Quit")
    print("1. Linear")
    print("2. Polynomial")
    print("3. RBF")
    print("4. Calculate ROC curve")
    kernelDecision = input("What kind of SVM kernel do you want to try? ")

    # Parsing decision
    if kernelDecision == "0":
        quit(0)
    elif kernelDecision == "4":
        svm.calculateROC()
        # Printing time analytics
        endTime = datetime.datetime.now()
        diffTime = endTime - startTime
        print("Start time: {}".format(startTime))
        print("End time: {}".format(endTime))
        print("Total elapsed time: {}".format(diffTime))
        return
    elif kernelDecision not in ["1", "2", "3"]:
        print("Not a valid input. Exiting...")
        quit(0)

    # Setting the kernelDecision.g
    if kernelDecision == "1":
        kernel = "linear"
    elif kernelDecision == "2":
        kernel = "poly"
    else:
        kernel = "rbf"

    # -- Cross validation? --
    print("\nUse 10-fold cross validation?")
    print("0. Exit")
    print("1. Yes")
    print("2. No")
    cvDecision = input("What do you want to do? ")

    # Parsing cvDecision
    if cvDecision == "0":
        quit(0)
    elif cvDecision not in ["1", "2"]:
        print("Not a valid input. Exiting...")
        quit(0)
    cross_validation = cvDecision == "1"

    svm.classify(kernel=kernel, cross_validation=cross_validation)

    # Printing time analytics
    endTime = datetime.datetime.now()
    diffTime = endTime - startTime
    print("Start time: {}".format(startTime))
    print("End time: {}".format(endTime))
    print("Total elapsed time: {}".format(diffTime))
Example #2
0
def mainTest(X_train, X_test, y_train, y_test, k):
    print("--Test 1--")

    M = 3

    # PCA Work
    print("\nTraining data:")
    comp_1 = pca.pca(X_train, M)
    X_train_t = pca.transform(X_train, comp_1)

    print("\nTesting data:")
    comp_2 = pca.pca(X_test, M)
    X_test_t = pca.transform(X_test, comp_2)

    # Print base results.
    print("\nBefore PCA - Dim ", len(X_train[0]))

    classifier = svm.train(X_train, y_train, k, C=None)
    info = svm.classify(classifier, X_test, return_sums=True)

    printResults(info[1], y_test, info[0])

    # Print transformed results.
    print("After PCA - Dim ", M)
    X_train = X_train_t
    X_test = X_test_t

    classifier = svm.train(X_train, y_train, k, C=None)
    info = svm.classify(classifier, X_test, return_sums=True)

    printResults(info[1], y_test, info[0])
Example #3
0
def plot_surfaceSVM(x_1, x_2, w, w0, ax=None, threshold=0.0, contourf=False):
    """Plots the decision surface of ``est`` on features ``x1`` and ``x2``. """
    xx1, xx2 = np.meshgrid(np.linspace(x_1.min(), x_1.max(), 500),
                           np.linspace(x_2.min(), x_2.max(), 500))
    # plot the hyperplane by evaluating the parameters on the grid
    X_pred = np.c_[xx1.ravel(),
                   xx2.ravel()]  # convert 2d grid into seq of points

    # pred = est.predict(X_pred)
    pred = np.empty([X_pred.shape[0], 1])
    for i in range(0, X_pred.shape[0]):
        pred[i] = svm.classify(X_pred[i], w, w0)

    Z = pred.reshape((500, 500))  # reshape seq to grid
    if ax is None:
        ax = plt.gca()
    # plot line via contour plot

    if contourf:
        ax.contourf(xx1,
                    xx2,
                    Z,
                    levels=np.linspace(0, 1.0, 10),
                    cmap=plt.cm.RdBu,
                    alpha=0.6)
    ax.contour(xx1, xx2, Z, levels=[threshold], colors='black')
    ax.set_xlim((x_1.min(), x_1.max()))
    ax.set_ylim((x_2.min(), x_2.max()))
def predict_with_model(embeddings, l=0):
    embeddings4layers = []
    for embedding in embeddings:
        embeddings4layers.append(np.mean(embedding[:, -768 * 4:], axis=0))
    results = []
    for y in range(0, 5):
        model_file_name = 'svm_model_y' + str(y) + '.joblib'
        if os.path.isfile(model_file_name):
            classifier = load(model_file_name)
        else:
            classifier, _, _, _ = svm.classify(
                "essays_mairesse_sb_tokenized_200_max_rev_vector.p",
                y,
                -1,
                0,
                add_mairesse=False)
            dump(classifier, model_file_name)
        predicts = classifier.predict(embeddings4layers)
        mean = np.mean(predicts)
        if mean != 0.5:
            results.append(np.round(np.mean(mean)))
        else:
            results.append(
                classifier.predict([np.mean(embeddings4layers, axis=0)])[0])

    return results
Example #5
0
def eval_svm(x, y, kernel, n):
    if len(x) < n:  # xの長さが短いときはNoneを返す
        print("サンプルデータ数が分割数に対して少ないです。")
        return
    sep_x = np.array(np.split(x, n))
    sep_y = np.array(np.split(y, n))
    correct_num = 0
    for i in range(n):
        ind = np.ones(n, dtype=bool)
        ind[i] = False
        train_data = sep_x[ind].reshape(-1, 2)
        train_ans = sep_y[ind].flatten()
        eval_data = sep_x[i]
        eval_ans = sep_y[i]

        alpha = svm.get_alpha(train_data, train_ans, kernel)
        if alpha is None:
            print("aborted!")
            return
        w, theta = svm.get_param(train_data, train_ans, alpha, kernel)
        predict_ans = svm.classify(train_data, train_ans, alpha, theta, kernel,
                                   eval_data)
        correct_num += len(np.where(eval_ans == predict_ans)[0])

    return correct_num / len(x)
Example #6
0
def get_performance(task,image_hash,image_fs,model_config,convolve_func):
    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']    
    classifier_kwargs = task.get('classifier_kwargs',{})

    split_results = []  
    splits = generate_splits(task,image_hash) 
    filterbank = filter_generation.get_filterbank(model_config)
    for (ind,split) in enumerate(splits):
        print ('split', ind)
        train_data = split['train_data']
        test_data = split['test_data']
        
        train_filenames = [t['filename'] for t in train_data]
        test_filenames = [t['filename'] for t in test_data]
        assert set(train_filenames).intersection(test_filenames) == set([])
        
        train_features = sp.row_stack([extract_features(im, image_fs, filterbank, model_config, convolve_func) for im in train_data])
        test_features = sp.row_stack([extract_features(im, image_fs, filterbank, model_config, convolve_func) for im in test_data])
        train_labels = split['train_labels']
        test_labels = split['test_labels']

        res = svm.classify(train_features,train_labels,test_features,test_labels,**classifier_kwargs)

        split_results.append(res)

    model_results = SON([])
    for stat in stats:
        if stat in split_results[0] and split_results[0][stat] != None:
            model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           


    return model_results, filterbank
    def img_callback(self, img):
        # convert ros image message into an open cv image
        try:
            image = self.bridge.compressed_imgmsg_to_cv2(img, "bgr8")
        except CvBridgeError as e:
            print(e)

        # compress
        cv2.resize(image, (256, 256))

        # get pixels from CNN -- list of obstacles, which are lists of pixels that make up each obstacle
        obstacles_lst = predict_relevant(image)

        # turn list of pixels into list of feature vectorsfor each pixel in each obstacle
        X = []
        for obstacle_pixels in obstacles_lst:
            X.append([[image[j][i] for i, j in obstacle_pixels]
                      ])  # double check whether its j,i or i,j

        # SVM classifications list - classifies each obstacle in the list
        classifications = classify(image, self.clf, X)

        # format the output, change the pixel values of obstacles to red or green based on classification
        for i in range(
                len(pixel_lst)
        ):  # this should get index of obstacles, which should align with classifications
            pixel_lst[i]
            for p in pixel_lst[i]:
                if classifications[i] == 'Rock':
                    cv2.rectangle(image, tuple((p[1], p[0])),
                                  tuple((p[1], p[0])), (0, 0, 255),
                                  1)  #red pixel
                else:
                    cv2.rectangle(image, tuple((p[1], p[0])),
                                  tuple((p[1], p[0])), (0, 255, 0),
                                  1)  #green pixel

        # Format the output to see original and classified image next to each other

        # Create an array big enough to hold both images next to each other.
        vis = np.zeros((256, 512), np.float32)

        mat1 = cv.CreateMat(256, 256, cv.CV_32FC1)
        cv.Convert(img1, mat1)

        mat2 = cv.CreateMat(256, 256, cv.CV_32FC1)
        cv.Convert(img2, mat2)

        # Copy both images into the composite image.
        vis[:256, :256] = mat1
        vis[:256, 256:512] = mat2

        try:
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(vis, "bgr8"))
        except CvBridgeError as e:
            print(e)
Example #8
0
 def whole_model(**kwargs):
     read(kwargs['link'], kwargs['input_dim'])
     _, _, _, _, auto_runtime, auto_err = \
         autoencoder(kwargs['epoch'], kwargs['batch'], kwargs['latent'],
                     kwargs['encoder_o'], kwargs['encoder_i'], kwargs['decoder_i'],
                     kwargs['decoder_o'], kwargs['train_percent'], kwargs['lam'],
                     kwargs['norm_order'], kwargs['loss_plot'])
     _, svm_runtime, svm_err = classify(kwargs['gamma'], kwargs['c'],
                                        kwargs['train_percent'])
     return auto_runtime, auto_err, svm_runtime, svm_err
Example #9
0
def main():
    print 'program start:', datetime.datetime.now()
    #Define our connection string
    conn_string = "host='52.74.79.13' dbname='sammy' user='******' password='******'"

    # print the connection string we will use to connect
    print "Connecting to database\n	->%s" % (conn_string)

    # get a connection, if a connect cannot be made an exception will be
    # raised here
    conn = psycopg2.connect(conn_string)

    # conn.cursor will return a cursor object, you can use this cursor to
    # perform queries
    cursor = conn.cursor()
    print "Connected!\n"

    cursor.execute(
        "select vi.vid, tf.*, vi.duration, tl.grade from train_features tf inner join \
	video_info vi on tf.video_id = vi.video_id \
	inner join train_label tl on tl.user_id = tf.user_id \
	 order by user_id, event_time;")

    # where tf.user_id in ('ff930d24cbdeb11e6dde8ceb0da5ac64', 'eee1df0fff33a37873990992bed20e82') \
    records = cursor.fetchall()
    print('fetch train data done, ', datetime.datetime.now())
    svm_trainset = createFeatures(records, True)

    cursor.execute(
        "select vi.vid, tf.*, vi.duration from test_features tf inner join \
	video_info vi on tf.video_id = vi.video_id \
	 order by user_id, event_time;")
    # where tf.user_id in ('a74fe6d4812fa93a1afa1a6a334ebdda', '4ab9d6eadf7510198f468d10fc29f689', '55654c092cd47b64ec9860f6a9cf3b40') \
    records = cursor.fetchall()
    print('fetch test data done, ', datetime.datetime.now())
    svm_testset = createFeatures(records, False)

    svm.train(svm_trainset['featureList'], svm_trainset['labelList'])
    svm.classify(svm_testset['featureList'], svm_testset['userList'])

    print('program finish', datetime.datetime.now())
Example #10
0
def main():
    if len(sys.argv) < 2:
        print "Must provide data file name. Exiting\n"
        return
    dataFile = sys.argv[1]

    #read file, shuffle, split into 2/3 train, 1/3 test
    data = readFile2(dataFile)
    np.random.seed(0)
    shuffled = shuffleMatrix(data)
    train, test = splitData(shuffled, 0.66)

    #standardize data -- training
    std, mus, sigmas = standardizeDataExceptLast(train)

    #standardize data -- testing. Standardize
    #with the values obtained in training set
    std_test = standardizeTestSF(test, mus, sigmas)

    tp=tn=fp=fn = 0.
    print "Total test rows: ", len(test)
    #pass features and class label separately to the
    #classifier training function
    SVM_classifier = svm.trainClassifier(train[:, :-1], train[:, -1])

    for idx, t in enumerate(test):
        c = svm.classify(SVM_classifier, [ t[:-1] ])
        if idx % 100 == 0:
            print "# test rows classified:", idx, "-- " + str(float(idx)/float(len(test)) * 100) + "% done."
        #count stats. Compare last entry in this row to KNN classification
        if t[-1] == 1:
            if c == 1:
                tp +=1
            elif c == 0:
                fn +=1
        elif t[-1] == 0:
            if c == 1:
                fp +=1
            elif c == 0:
                tn += 1

    precision = div(tp, tp+fp)
    recall = div(tp, tp+fn)
    f_measure = fmeasure(precision, recall)
    accuracy = div(tp+tn, tp+tn+fp+fn)

    print "TP: ", tp, "TN: ", tn
    print "FP: ",  fp, "FN: ", fn
    print "Precision =", precision
    print "Recall =", recall
    print "f-measure =", f_measure
    print "Accuracy =", accuracy
Example #11
0
def main():
    if len(sys.argv) < 2:
        print "Must provide data file name. Exiting\n"
        return
    dataFile = sys.argv[1]

    #read file, shuffle, split into 2/3 train, 1/3 test
    data = readFile3(dataFile)

    #get a list of all classes from dataset for 1vs1 classification
    classes = np.unique(data[:, -1])
    k = len(classes)
    np.random.seed(0)
    shuffled = shuffleMatrix(data)
    train, test = splitData(shuffled, 0.66)

    #standardize data -- training
    std, mus, sigmas = standardizeDataExceptLast(train)

    #standardize data -- testing. Standardize
    #with the values obtained in training set
    std_test = standardizeTestSF(test, mus, sigmas)

    tp = tn = fp = fn = 0.

    #train k(k-1)/2 classifiers
    numClassifiers = k * (k - 1) / 2

    classifiers = list()

    pairs = combinations(classes, 2)
    for pair in pairs:
        #extract data where class is pair[0] or pair[1]
        # d = train[np.where( train[:,-1] == pair[0] or train[:,-1] == pair[1])]
        class1 = train[np.where(train[:, -1] == pair[0])]
        class2 = train[np.where(train[:, -1] == pair[1])]
        d = np.vstack([class1, class2])
        classifier = svm.trainClassifier(d[:, :-1], d[:, -1])
        classifiers.append(classifier)

    correctClassifications = 0.
    for idx, t in enumerate(test):
        scores = Counter()
        for classifier in classifiers:
            c = svm.classify(classifier, [t[:-1]])[0]
            scores[c] += 1
        best = scores.most_common()[0]
        if best[0] == t[-1]:
            correctClassifications += 1
    accuracy = div(correctClassifications, len(test))
    print "Accuracy = ", accuracy
def add_max_200_svm_probability(path, l=0):
    classifier, X_test, y_test, _ = svm.classify(path, y, cv, l)
    x = classifier.predict_proba(X_test)
    test_df["svm_predict0"] = x[:, 0]
    test_df["svm_predict1"] = x[:, 1]
    determined = test_df.groupby("#AUTHID").mean()
    determined["predicted_label"] = determined.apply(
        lambda row: label_assigner(row["svm_predict0"], row["svm_predict1"]),
        axis=1)
    determined["is_svm_true"] = determined.apply(lambda row: truth_determiner(
        row["y" + str(y)], row["predicted_label"]),
                                                 axis=1)
    acc1 = len(determined[determined["is_svm_true"] == 1])
    acc = acc1 / len(determined)
    return acc
def main():
	parser = argparse.ArgumentParser(description='Run SVM and Perceptron algorithms of Adult Data Set.')
	parser.add_argument('Training_filename', help='Training file')
	parser.add_argument('Test_filepath', help = 'Test File')
	args = parser.parse_args()
	dev = args.Training_filename
	test = args.Test_filepath

	print "Loading the data files...\n"
	X,Y = matrixbuild(args.Training_filename)
	DX,DY = matrixbuild(dev)
	TX, TY = matrixbuild(test)

	print "Training for the perceptron.\n"
	perc_weights = perceptron1.gradienttrain(X,Y,100)

	print "\n\nChecking accuracy on the test set.\n"
	perc_accuracy = perceptron1.classify(TX, TY, perc_weights)
	
	print ("The accuracy of the perceptron on the test set was %s%%\n" % perc_accuracy)
	

	#-------------Run the SVM algorithm------------#

	# find_c(dev_matrix,dev_classes, runs_each, learn)
	print "Finding best c from the dev set...\n"
	c, c_accuracy, c_list = svm.find_c(DX, DY, 20, 0.5)

	# train(data_matrix, real_classes, runs, learn, cost)

	print ("\n\nTraining for the SVM with C = %f\n" % c)
	acc, svm_weights, b = svm.train(X, Y, 100, 0.5, c, "train")
	
	# classify(test_matrix, test_class, weights, b)

	print "\n\nChecking accuracy on the test set.\n"
	svm_accuracy = svm.classify(TX,TY,svm_weights, b)

	print ("The accuracy of the SVM on the test set was %s%%" % svm_accuracy)

	plt.plot(c_list, c_accuracy)
	plt.xlabel("Cost value")
	plt.ylabel("Accuracy")
	plt.title("C vs. Accuracy")
	plt.show()
Example #14
0
def classify_image(images, mask_list, k_size, save, display):
    """
    Classify pixels of a single image
    """
    if len(images) > 1:
        raise ValueError('Only one image can be classified at once')
    logging.info('Calculating, normalizing feature vectors for image')
    image = images[0]  # First and only member
    vectors = calculate_features(image.image, image.fov_mask, mask_list,
                                 k_size)
    logging.info('Classifying image pixels')
    probabilities, prediction = svm.classify(vectors)
    svm.assess(image.truth, prediction)
    svm.plot_roc(image.truth, probabilities)

    if save:
        image_utils.save_image(prediction, 'prediction.png')
        logging.info('Saved classified image')
    if display:
        image_utils.display_image(prediction)
        logging.info('Displaying classified image')
def plot_surfaceSVM(x_1, x_2, w,w0, ax=None, threshold=0.0, contourf=False):
    """Plots the decision surface of ``est`` on features ``x1`` and ``x2``. """
    xx1, xx2 = np.meshgrid(np.linspace(x_1.min(), x_1.max(), 500),
                           np.linspace(x_2.min(), x_2.max(), 500))
    # plot the hyperplane by evaluating the parameters on the grid
    X_pred = np.c_[xx1.ravel(), xx2.ravel()]  # convert 2d grid into seq of points

    # pred = est.predict(X_pred)
    pred = np.empty([X_pred.shape[0],1])
    for i in range(0,X_pred.shape[0]):
        pred[i] = svm.classify(X_pred[i],w,w0)

    Z = pred.reshape((500, 500))  # reshape seq to grid
    if ax is None:
        ax = plt.gca()
    # plot line via contour plot

    if contourf:
        ax.contourf(xx1, xx2, Z, levels=np.linspace(0, 1.0, 10), cmap=plt.cm.RdBu, alpha=0.6)
    ax.contour(xx1, xx2, Z, levels=[threshold], colors='black')
    ax.set_xlim((x_1.min(), x_1.max()))
    ax.set_ylim((x_2.min(), x_2.max()))
Example #16
0
def test(thrsh, k, i, da):

    print(i, end=" ")

    # Train and predict values.
    classifier = svm.train(da[0], da[2], k, threshold=thrsh)
    info = svm.classify(classifier, da[1], return_sums=True)

    y_pred = info[0]
    sums = info[1]

    # Print percentage success
    percent = 1 - np.mean(y_pred != da[3].T)
    if (percent > .5):
        print(colored("{:.2f}\t".format(percent), 'green'), end=" ")
    elif (percent > .01):
        print(colored("{:.2f}\t".format(percent), 'blue'), end=" ")
    else:
        print(colored("{:.2f}\t".format(percent), 'red'), end=" ")
    if i % 4 == 0:
        print()

    return percent, y_pred, sums
def add_max_200_svm(path, l=0):
    classifier, X_test, y_test, _ = svm.classify(path, y, cv, l)
    test_df["svm_predict"] = classifier.predict(X_test)
    test_df["vector"] = X_test
    test_df["is_svm_true"] = test_df.apply(
        lambda row: truth_determiner(row["y" + str(y)], row["svm_predict"]),
        axis=1)
    determined = test_df.groupby("#AUTHID").mean()
    determined["vector"] = test_df.groupby("#AUTHID")["vector"].apply(np.mean)
    dont_know = determined[determined["is_svm_true"] == 0.5].copy()
    test = dont_know["vector"].to_list()
    if not dont_know.empty:
        dont_know["svm_predict"] = classifier.predict(test)
        dont_know["is_svm_true"] = dont_know.apply(
            lambda row: truth_determiner(row["y" + str(y)], row["svm_predict"]
                                         ),
            axis=1)
        acc2 = len(dont_know[dont_know["is_svm_true"] == 1])
    else:
        acc2 = 0
    acc1 = len(determined[determined["is_svm_true"] > 0.5])

    acc = (acc2 + acc1) / len(determined)
    return acc1, acc2, acc
#process cnn_data
cnndata = loader.CNN_feature_loader([], tstset, dataFolder)
cnn_test = cnndata['vin_testing']
cnn_test_extracted = [cnn_test[vin] for vin in tstset]

cnn_recordTest = cnndata['record_testing']
cnn_rTdata = np.asarray(map(lambda x:x['data'],cnn_recordTest))
cnn_rt_length = len(cnn_rTdata)
cnn_rT_data = cnn_rTdata.reshape(cnn_rt_length,576)
cnn_rT_label = np.asarray(map(lambda x:x['label'],cnn_recordTest)).reshape(cnn_rt_length,2)
print "testing set length %i" %(len(tstset))
print "label ratio: %i : %i" %(len(tstset)-numofone,numofone)


print "=========testing phase========="
s = svm.classify(svm_tst_set_feature,modelFolder)
c = cnn.classify("trained/"+modelFolder+"/cnnmodel.ckpt",cnn_test_extracted)
#print "svm prediction: "
#print s
#print "cnn prediction"
#print c

compound = zip(s,c)
result = map(lambda x:1 if(x[0][0]+x[1][0]<1)else 0,compound)
s_res = map(lambda x:1 if(x[0]<0.5)else 0,s)
c_res = map(lambda x:1 if(x[0]<0.5)else 0,c)

TP=0
FP=0
FN=0
TN=0
Example #19
0
def evaluate(outfile,feature_certificate,cpath,task,ext_hash):

    conn = pm.Connection(document_class=bson.SON)
    db = conn[DB_NAME]
    
    perf_fs = gridfs.GridFS(db,'performance')
    perf_coll = db['performance.files']
    
    remove_existing(perf_coll,perf_fs,ext_hash)

    feature_certdict = cPickle.load(open(feature_certificate))
    feature_hash = feature_certdict['feature_hash']
    image_hash = feature_certdict['image_hash']
    model_hash = feature_certdict['model_hash']
    image_config_gen = feature_certdict['args']['images']
    model_col = db['models.files']
    feature_fs = gridfs.GridFS(db,'features')
    feature_col = db['features.files']
    
    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']    
       
    if isinstance(task,list):
        task_list = task
    else:
        task_list = [task]
    
    model_configs = get_most_recent_files(model_col,{'__hash__':model_hash})
    
    for m in model_configs:
        print('Evaluating model',m) 
        for task in task_list:
            task['universe'] = task.get('universe',SON([]))
            task['universe']['model'] = m['config']['model']
            print('task', task)
            classifier_kwargs = task.get('classifier_kwargs',{})    
            split_results = []
            splits = generate_splits(task,feature_hash,'features') 
            for (ind,split) in enumerate(splits):
                print ('split', ind)
                train_data = split['train_data']
                test_data = split['test_data']
                
                train_filenames = [t['filename'] for t in train_data]
                test_filenames = [t['filename'] for t in test_data]
                assert set(train_filenames).intersection(test_filenames) == set([])
                
                print('train feature extraction ...')
                train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data])
                print('test feature extraction ...')
                test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data])
                train_labels = split['train_labels']
                test_labels = split['test_labels']
    
                print('classifier ...')
                res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs)
                print('Split test accuracy', res['test_accuracy'])
                split_results.append(res)
        
            model_results = SON([])
            for stat in STATS:
                if stat in split_results[0] and split_results[0][stat] != None:
                    model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           
    
            out_record = SON([('model',m['config']['model']),
                              ('model_hash',model_hash), 
                              ('model_filename',m['filename']), 
                              ('images',son_escape(image_config_gen)),
                              ('image_hash',image_hash),
                              ('task',son_escape(task)),
                         ])
                                             
            filename = get_filename(out_record)
            out_record['filename'] = filename
            out_record['config_path'] = cpath
            out_record['__hash__'] = ext_hash
            out_record.update(model_results)
            print('dump out ...')
            out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)]))
            
            perf_fs.put(out_data,**out_record)

    createCertificateDict(outfile,{'feature_file':feature_certificate})
Example #20
0
def main():
    if len(sys.argv) < 2:
        print "Must provide data file name. Exiting\n"
        return
    dataFile = sys.argv[1]

    #read file, shuffle, split into 2/3 train, 1/3 test
    data = readFile3(dataFile)

    #get a list of all classes from dataset for 1vs1 classification
    classes = np.unique(data[:, -1])
    k = len(classes)
    np.random.seed(0)
    shuffled = shuffleMatrix(data)
    train, test = splitData(shuffled, 0.66)

    #standardize data -- training
    std, mus, sigmas = standardizeDataExceptLast(train)

    #standardize data -- testing. Standardize
    #with the values obtained in training set
    std_test = standardizeTestSF(test, mus, sigmas)

    tp = tn = fp = fn = 0.

    #train k(k-1)/2 classifiers
    numClassifiers = k * (k - 1) / 2

    classifiers = list()

    pairs = combinations(classes, 2)
    for pair in pairs:
        #extract data where class is pair[0] or pair[1]
        # d = train[np.where( train[:,-1] == pair[0] or train[:,-1] == pair[1])]
        class1 = train[np.where(train[:, -1] == pair[0])]
        class2 = train[np.where(train[:, -1] == pair[1])]
        d = np.vstack([class1, class2])
        classifier = svm.trainClassifier(d[:, :-1], d[:, -1])
        classifiers.append(classifier)

    #count predictions for each class
    predictions = np.zeros([len(classes), len(classes)])
    classCounts = Counter()

    correctClassifications = 0.
    for idx, t in enumerate(test):
        if idx % 100 == 0:
            print "# test rows classified:", idx, "-- " + str(
                float(idx) / float(len(test)) * 100) + "% done."
        #count stats. Compare last entry in this row to KNN classification
        scores = Counter()
        for classifier in classifiers:
            c = svm.classify(classifier, [t[:-1]])[0]
            scores[c] += 1
        best = scores.most_common()[0]
        predictedClass = best[0]
        trueClass = t[-1]
        if predictedClass == trueClass:
            correctClassifications += 1

        classCounts[int(trueClass) - 1] += 1
        predictions[int(predictedClass) - 1][int(trueClass) - 1] += 1
    accuracy = div(correctClassifications, len(test))
    cm = makeConfusionMatrix(predictions, len(test))
    print "Accuracy = ", accuracy
    print "Prediction counts: "
    print predictions
    print "Confusion matrix (entries are %): "
    print cm
Example #21
0
    trvin = vinlist[tr]
    tstvin = vinlist[tst]

    svmtrain = filter(lambda x: x['vin'] in trvin, svmdata)
    svmtest = filter(lambda x: x['vin'] in tstvin, svmdata)
    cnntrain = {}
    cnntest = {}
    for k in cnndata.keys():
        if (k in trvin):
            cnntrain[k] = cnndata[k]
        if (k in tstvin):
            cnntest[k] = cnndata[k]
    svm.train(svmtrain)
    cnn.train(cnntrain)

    svmclassify = svm.classify(svmtest)
    svmres = svmclassify['detail']
    svmacc = svmclassify['accuracy']
    cnnclassify = cnn.classify(cnntest)
    cnnres = cnnclassify['detail']
    cnnacc = cnnclassify['accuracy']
    print "standalone classifier accuracy: svm -- %f , cnn -- %f" % (svmacc,
                                                                     cnnacc)

    pred = {}
    for each in svmres:
        vin = each['vin']
        svm_proba = each['proba_predicted']
        cnn_proba = cnnres[vin]['predsum']
        stack_proba = (svm_proba + cnn_proba)[1]
        pred_label = 1 if (stack_proba > const['decision_boundary']) else 0
Example #22
0
import numpy as np
import svm
import kernel as k

# Test AND gate
clsfyr = svm.train([[1, 1], [1, -1], [-1, 1], [-1, -1]], [1, -1, -1, -1],
                   k.linear)
# should be [0,0,0,1,1,0,0,0]
print("classified: " + str(
    svm.classify(clsfyr, [[-1, -1], [1, -1], [-1, 1], [1, 1], [1, 1], [1, -1],
                          [-1, -1], [-1, 1]])))
print("\n\n\n\n\n")

X = np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0], [-1.0, 0.0], [-2.0, 0.0],
              [-3.0, 0.0]])
y = np.array([[1.0], [1.0], [1.0], [-1.0], [-1.0], [-1.0]])
clsfyr = svm.train(X, y, k.linear)
print("classified: " + str(svm.classify(clsfyr, X)))
Example #23
0
def extract_and_evaluate_core(split,m,convolve_func_name,task,cache_port):
    classifier_kwargs = task.get('classifier_kwargs',{})  
    train_data = split['train_data']
    test_data = split['test_data']
    train_labels = split['train_labels']
    test_labels = split['test_labels']                
    train_filenames = [t['filename'] for t in train_data]
    test_filenames = [t['filename'] for t in test_data]
    assert set(train_filenames).intersection(test_filenames) == set([])

    existing_train_features = [get_from_cache((tf,m,task.get('transform_average')),FEATURE_CACHE) for tf in train_filenames]
    existing_train_labels = [train_labels[i] for (i,x) in enumerate(existing_train_features) if x is not None]
    new_train_filenames = [train_filenames[i] for (i,x) in enumerate(existing_train_features) if x is None]
    new_train_labels = [train_labels[i] for (i,x) in enumerate(existing_train_features) if x is None]


    existing_test_features = [get_from_cache((tf,m,task.get('transform_average')),FEATURE_CACHE) for tf in test_filenames]
    existing_test_labels = [test_labels[i] for (i,x) in enumerate(existing_test_features) if x is not None]
    new_test_filenames =[test_filenames[i] for (i,x) in enumerate(existing_test_features) if x is None]
    new_test_labels = [test_labels[i] for (i,x) in enumerate(existing_test_features) if x is None]

    if convolve_func_name == 'numpy':
        num_batches = multiprocessing.cpu_count()
        if num_batches > 1:
            pool = multiprocessing.Pool()
    elif convolve_func_name == 'pyfft':

        num_batches = get_num_gpus()
        if num_batches > 1:
            pool = multiprocessing.Pool(processes = num_batches)
        else:
            pool = None
    else:
        raise ValueError, 'convolve func name not recognized'

    if num_batches > 1:
        batches = get_data_batches(new_train_filenames,num_batches)
        results = []
        for (bn,b) in enumerate(batches):
            results.append(pool.apply_async(extract_and_evaluate_inner_core,(b,m.to_dict(),convolve_func_name,bn,task.to_dict(),cache_port)))
        results = [r.get() for r in results]
        new_train_features = ListUnion(results)
        batches = get_data_batches(new_test_filenames,num_batches)
        results = []
        for (bn,b) in enumerate(batches):
            results.append(pool.apply_async(extract_and_evaluate_inner_core,(b,m.to_dict(),convolve_func_name,bn,task.to_dict(),cache_port)))
        results = [r.get() for r in results]
        new_test_features = ListUnion(results)
    else:
        print('train feature extraction ...')
        new_train_features = extract_and_evaluate_inner_core(new_train_filenames,m,convolve_func_name,0,task,cache_port)
        print('test feature extraction ...')
        new_test_features = extract_and_evaluate_inner_core(new_test_filenames,m,convolve_func_name,0,task,cache_port)

    #TODO get the order consistent with original ordering
    train_features = sp.row_stack(filter(lambda x : x is not None,existing_train_features) + new_train_features)
    test_features = sp.row_stack(filter(lambda x : x is not None, existing_test_features) + new_test_features)
    train_labels = existing_train_labels + new_train_labels
    test_labels = existing_test_labels + new_test_labels
    
    for (im,f) in zip(new_train_filenames,new_train_features):
        put_in_cache((im,m,task.get('transform_average')),f,FEATURE_CACHE)
    for(im,f) in zip(new_test_filenames,new_test_features):
        put_in_cache((im,m,task.get('transform_average')),f,FEATURE_CACHE)
                           
    print('classifier ...')
    res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs)
    print('Split test accuracy', res['test_accuracy'])
    return res
print('-------------- Start HOG --------------')
# Calculates HOG Descriptor for train and test images
hog_train = {}
hog_test = {}
for key in test_images.keys():
    print('--------------')
    print(key)
    for i in range(0, len(train_images[key])):
        train_act = calculate_hog(train_images[key][i])
        if key not in list(hog_train.keys()):
            hog_train[key] = [train_act]
        else:
            list_train = hog_train[key]
            list_train.append(train_act)
    print('Finished train')
    for j in range(0, len(test_images[key])):
        test_act = calculate_hog(test_images[key][j])
        if key not in list(hog_test.keys()):
            hog_test[key] = [test_act]
        else:
            list_test = hog_test[key]
            list_test.append(test_act)
    print('Finished test')

save_var(args.out_dir + '/HOG_train.npy', hog_train)
save_var(args.out_dir + '/HOG_test.npy', hog_test)

ACA = classify(hog_train, hog_test)
save_var(args.out_dir + '/ACA.npy', ACA)
        # KNN
        #.........................
        knn_match = knn.knn(test_sample_x, test_sample_y, letters, n_train)
        matches.append(knn_match)

        #.........................
        # NC
        #.........................
        nc_match = nc.nearest_centroid(test_sample_x, test_sample_y, letters,
                                       n_train)
        matches.append(nc_match)

        #.........................
        # SVM
        #.........................
        svm_match = svm.classify(svm_classifier, test_drawing, letters)
        matches.append(svm_match)

        dtw_matches.append(dtw_match)
        knn_matches.append(knn_match)
        nc_matches.append(nc_match)
        svm_matches.append(svm_match)

        # Add match results for current letter sample
        current_letter_results.append(matches)
    '''
    ii. With the individual matches, now we calculate the overall accuracy of the classifiers for the current letter.
    '''
    dtw_accuracy = 0
    knn_accuracy = 0
    nc_accuracy = 0
    def img_callback(self, img):
        # convert ros image message into an open cv image
        # if self.i:
        #     self.i = False
        # else:
        #    return
        try:
            image = self.bridge.compressed_imgmsg_to_cv2(img, "bgr8")
        except CvBridgeError as e:
            print(e)


        image_full = image
        img = cv2.resize(image, (256,256))
        image = img.copy()
        print(img.dtype)
        print('img loaded')
        #rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        rgb_img = convert_for_CNN(img)
        print(rgb_img.dtype)
        # get pixels from CNN -- list of obstacles, which are lists of pixels that make up each obstacle
        obstacles_lst,imagearray = predict_relevant(rgb_img)
        print('CNN done')

        # turn list of pixels into list of feature vectorsfor each pixel in each obstacle
        X = []
        for obstacle_lst in obstacles_lst:
            X.append([[image[j][i]/255.0 for j,i in obstacle_lst]])


        # SVM classifications list - classifies each obstacle in the list
        colorflag = True
        if colorflag:
        #classifications = classify(image, self.clf, X)
            classifications = classify(image, self.clf, X)
        else:
            self.clf = pickle.load(open('texture_svm.pkl', 'rb'))
            classifications = ts.predict_img(self.clf, image_full, obstacles_lst)
            print(len(obstacles_lst))
            print(classifications)

        print('svm done')

        # format the output, change the pixel values of obstacles to red or green based on classification
        for i in range(len(obstacles_lst)):  # this should get index of obstacles, which should align with classifications
            pixel_lst = obstacles_lst[i]
            for p in pixel_lst:
                if classifications[i] == 'Rock':
                    cv2.rectangle(image,tuple((p[1],p[0])),tuple((p[1],p[0])),(0,0,255),1) #red pixel
                else:
                    cv2.rectangle(image,tuple((p[1],p[0])),tuple((p[1],p[0])),(0,255,0),1) #green pixel


        # Format the output to see original and classified image next to each other
        vis = np.concatenate((img, image), axis=1)

        #image_print(vis)

        try:
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(vis, "bgr8"))
            self.i = True
        except CvBridgeError as e:
            print(e)
Example #27
0
lbpFinalTrainVect = clbp.getDescriptorValues(finalTrainVect,cfg.lbpConfig['neigh'],
                                             cfg.lbpConfig['radius'],
                                             cfg.lbpConfig['lbpType'])

# Train a svm
clf = svm.computeSvm(lbpFinalTrainVect,finalTagsVect)

# Vector that contains the LBP values of the test images
lbpFinalTestVect=[]
lbpFinalTestVect=clbp.getDescriptorValues(finalTestVect,cfg.lbpConfig['neigh'],
                                             cfg.lbpConfig['radius'],
                                             cfg.lbpConfig['lbpType'])
                                             
                                             
# Predict all the test image with the svm trained
predictedValues = svm.classify(clf,lbpFinalTestVect)
probaPredictedValues = svm.predictProba(clf,lbpFinalTestVect)
predictedValuesBinary = []

# Translate predicted values in string to binary
for i in range (0,len(predictedValues)):
    if (predictedValues[i] == "Worn piece"):
        predictedValuesBinary.append(0)
    else:
        predictedValuesBinary.append(1)

# Real values of predicted sets
realLabelsBinary = []
realLabelsString = []

for i in range (0,len(predictedValues)):
Example #28
0
def main():
    startTime = datetime.datetime.now()
    print("Start time: {}".format(startTime))

    # -- Classifier? --
    print("60-473 assignment 02")
    print("0. Quit")
    print("1. Linear")
    print("2. Polynomial")
    print("3. RBF")
    print("4. Calculate ROC curve")
    kernelDecision = input("What kind of SVM kernel do you want to try? ")

    # Parsing decision
    if kernelDecision == "0":
        quit(0)
    elif kernelDecision == "4":
        svm.calculateROC()
        # Printing time analytics
        endTime = datetime.datetime.now()
        diffTime = endTime - startTime
        print("Start time: {}".format(startTime))
        print("End time: {}".format(endTime))
        print("Total elapsed time: {}".format(diffTime))
        return
    elif kernelDecision not in ["1", "2", "3"]:
        print("Not a valid input. Exiting...")
        quit(0)

    # Setting the kernelDecision.g
    if kernelDecision == "1":
        kernel = "linear"
    elif kernelDecision == "2":
        kernel = "poly"
    else:
        kernel = "rbf"


    # -- Cross validation? --
    print("\nUse 10-fold cross validation?")
    print("0. Exit")
    print("1. Yes")
    print("2. No")
    cvDecision = input("What do you want to do? ")

    # Parsing cvDecision
    if cvDecision == "0":
        quit(0)
    elif cvDecision not in ["1", "2"]:
        print("Not a valid input. Exiting...")
        quit(0)
    cross_validation = cvDecision == "1"

    svm.classify(kernel=kernel, cross_validation=cross_validation)

    # Printing time analytics
    endTime = datetime.datetime.now()
    diffTime = endTime - startTime
    print("Start time: {}".format(startTime))
    print("End time: {}".format(endTime))
    print("Total elapsed time: {}".format(diffTime))
Example #29
0
# encoding=utf-8
# import pudb
# pu.db
import pandas as pd
import numpy as np
import svm

d = svm.create_data()
data = d.ix[:, 2:]
label = d.ix[:, 0]

norm = svm.normalize(data)
data_transformed = svm.pca(norm, 0.9)
"""
arguments: *;*;ratio;class weight;decision function shape;kernel
"""
grid, accuracy = svm.classify(data_transformed, label, 0.8, {
    0: 20,
    1: 20,
    2: 20,
    3: 30,
    4: 20,
    5: 20
}, 'ovo', 'rbf')

fall_accuracy = grid[3, 6]
Example #30
0
def main():
    
    path ='C:\Documents and Settings\Administrator\Desktop\\'
    imgOperations.creatingDirectories(path);
    
    """
    Parte de creación del clasificador
    """

    model = svmModel.CreatingSVMModel()    
    """
    Parte de obtención de inserts
    """
    images,imagesNames = imgOperations.readImages(
                setup.headToolImagesSettings['readPath']
                ,setup.headToolImagesSettings['extension'])
    circles = [ ]    
    predictedValuesBinary = []

    for i, image in enumerate (images):
        imageName = 'Image'+imagesNames[i]
        circles = circleD.findCircles(images[i],imageName,
                                          setup.insertImagesSettings[
                                          'minRadius'],
                                          setup.insertImagesSettings[
                                          'maxRadius'],
                                          False,circles)
        
        for j, circle in enumerate (circles):
            circle = circles[j]
            imageNameSave= str(j)+imageName
            print imageNameSave
            insert = cropImg.cut(circle[0],circle[1],image,
                                 imageNameSave,True,
                                 setup.insertImagesSettings[
                                 'sizeHorizontalInsert'],
                                 setup.insertImagesSettings[
                                 'sizeVerticalInsert'])
            imgOperations.saveImage(insert,setup.directoriesToSaveImgs[
                              'insertsPath']
                              ,imageNameSave)
            try:
                leftBorderPatch = clb.obtaningLeftBorderImage(insert,imageName,
                                                          inserts=False)
                imgOperations.saveImage(leftBorderPatch,setup.directoriesToSaveImgs[
                              'leftBorderPath'],imageNameSave)
    
                patches = cpatches.computingRegions(leftBorderPatch,
                                                  setup.regionSettings['cols'],
                                                  setup.regionSettings['rows'])
    
                for p in range (0,len(patches)):
                    patchesNameSave = imageNameSave + str(p)
                    patch = patches[p]
                    imgOperations.saveImage(patch,setup.directoriesToSaveImgs[
                                      'patchesPath'],patchesNameSave)
                                      
                    """
                    TO-DO crear nombre con el que los diferentes patches son guard
                    ados
                    """
                                     
                #Calculating lbp values
                try:
                    lbpTestPatches = clbp.getDescriptorValues(patches,
                                                   setup.LBPSettings['neigh'],
                                                   setup.LBPSettings['radius'],
                                                   setup.LBPSettings['lbpType'])
    
                    predictedValues = svm.classify(model,lbpTestPatches)
                except AttributeError:
                    print "not valid patches where to compute LBP values"
                    
    
    
                # Translate predicted values in string to binary
                for i in range (0,len(predictedValues)):
                    if (predictedValues[i] == "Worn piece"):
                        predictedValuesBinary.append(0)
                    else:
                        predictedValuesBinary.append(1)
                        
                #print predictedValuesBinary
                predictedValues = []
                predictedValuesBinary = []
            except:
                print "not a valid Image to crop"
    image = img.copy()
    print('img loaded')

    #the image is converted for a suitable representation that can be passed to the CNN
    rgb_img = convert_for_CNN(image)
    # get pixels from CNN -- list of obstacles, which are lists of pixels that make up each obstacle
    obstacles_lst = predict_relevant(rgb_img)
    print('CNN done')

    # turn list of pixels into list of feature vectorsfor each pixel in each obstacle
    X = []
    for obstacle_lst in obstacles_lst:
        X.append([[image[j][i] / 255.0 for j, i in obstacle_lst]])

    # SVM classifications list - classifies each obstacle in the list
    classifications = classify(image, clf, X)
    print('svm done')

    # format the output, change the pixel values of obstacles to red or green based on classification
    for i in range(
            len(obstacles_lst)
    ):  # this should get index of obstacles, which should align with classifications
        pixel_lst = obstacles_lst[i]
        for p in pixel_lst:
            if classifications[i] == 'Rock':
                cv2.rectangle(image, tuple((p[1], p[0])), tuple((p[1], p[0])),
                              (0, 0, 255), 1)  #red pixel
            else:
                cv2.rectangle(image, tuple((p[1], p[0])), tuple((p[1], p[0])),
                              (0, 255, 0), 1)  #green pixel
Example #32
0
            minx = dx
            miny = dy
            dtw_match = letter
finish = round(time.time() - start, 3)
print("\tDTW: {} (time: {} s)".format(dtw_match, finish))

#---------------------------------
# 2. Perform k-nearest neighbors classification
#---------------------------------
start = time.time()
knn_match = knn.knn(captured_x, captured_y, selected_list, n)
finish = round(time.time() - start, 3)
print("\tKNN: {} (time: {} s)".format(knn_match, finish))

#---------------------------------
# 3. Perform nearest centroid classification
#---------------------------------
start = time.time()
nc_match = nc.nearest_centroid(captured_x, captured_y, selected_list, n)
finish = round(time.time() - start, 3)
print("\tNC: {} (time: {} s)".format(nc_match, finish))

#---------------------------------
# 4. Support vector machine
#---------------------------------
start = time.time()
svm_match = svm.classify(svm_classifier, drawing, selected_list)
finish = round(time.time() - start, 3)
print("\tSVM: {} (time: {} s)".format(svm_match, finish))
print("  Demo finished.")