Ejemplo n.º 1
0
 def test(self, feature_data = None):
     #test on current scan:
     print getTime(), 'test on:', self.processor.scan_dataset.id    
         
     if feature_data == None:
         filename = self.processor.get_features_filename()
         dict = ut.load_pickle(filename)
     else:
         dict = feature_data
     
     baseline_labels = self.classify_baseline_code()
 
     return baseline_labels, self.test_results(dict, baseline_labels)  
Ejemplo n.º 2
0
    def test(self, feature_data=None):
        # test on current scan:
        print getTime(), "test on:", self.processor.scan_dataset.id

        if feature_data == None:
            filename = self.processor.get_features_filename()
            dict = ut.load_pickle(filename)
        else:
            dict = feature_data

        baseline_labels = self.classify_baseline_code()

        return baseline_labels, self.test_results(dict, baseline_labels)
Ejemplo n.º 3
0
    def train(self):
        #cv_boost_params = cv.CvBoostParams()

        #priors = cv.cvCreateMat(1,2,cv.CV_32FC1)
        #priors[0] = 10
        #priors[1] = 1
        
        #cv_boost_params.max_categories = 2
        #cv_boost_params.priors = priors #TODO: activate them
        self.cv_classifier = cv.CvDTree() #cv.CvBoost()
        train_datastructures = self.create_train_datastructures()
            
        (train_data, train_labels, type_mask) = train_datastructures
        print 'WARNING! use CvDTree (single decision trees) for now as load/save works!'#'boost'
        print getTime(), self.cv_classifier.train(train_data, cv.CV_ROW_SAMPLE, train_labels, None, None, type_mask ) 
       
        print getTime(), 'traning finished'
Ejemplo n.º 4
0
    def test(self, feature_data = None):
        #test on current scan:
        print getTime(), 'test on:', self.processor.scan_dataset.id    
            
        if feature_data == None:
            filename = self.processor.get_features_filename()
            print 'loading', filename
            dict = ut.load_pickle(filename)
        else:
            dict = feature_data
        
        #print getTime(), dict
        current_set_size = dict['set_size']
        feature_vector_length = len(self.processor.features.get_indexvector(self.features))
        print getTime(), feature_vector_length
        labels = np.array(np.zeros(len(self.processor.map_polys)))
        print 'test: length of labels vector:', len(labels)
        test = cv.cvCreateMat(1,feature_vector_length,cv.CV_32FC1)
        
        if current_set_size == 0:
            print getTime(), 'ERROR: test dataset is empty!'
            return labels, 1, 1, 1

        count = 0
        for index in dict['point_indices']:
            fv = (dict['features'][count])[self.processor.features.get_indexvector(self.features)]
            #print getTime(), fv, dict['features'][count]

            for fv_index, fv_value in enumerate(fv):
                test[fv_index] = fv_value
             
            #print 'class',self.cv_classifier
            label = self.cv_classifier.predict(test)
            #print label.value
            labels[index] = label.value
            #print 'tdone'
            if count % 4096 == 0:
                print getTime(), 'testing:', count, 'of', current_set_size, '(',(float(count)/float(current_set_size)*100.0),'%)'
                
            count += 1


        #save for later use for postprocessing:
        self.test_feature_dict = dict
        self.test_labels = labels
        #cv.cvReleaseMat(test)
        return labels, self.test_results(dict, labels)  
Ejemplo n.º 5
0
    def create_train_datastructures(self):
        #loop through all marked datasets
        self.processor.scan_dataset = self.processor.scans_database.get_dataset(0)
          
        training_set_size = 0
        
        data = []
        #get size of training set in total
        while False != self.processor.scan_dataset:
            if self.processor.scan_dataset.is_training_set:
                
                filename = self.processor.get_features_filename(True)
                print 'loading', filename
                dict = ut.load_pickle(filename)

                # make an equal size of points for each class: use object labels more often:
                difference = np.sum(dict['labels'] == processor.LABEL_SURFACE) - np.sum(dict['labels'] == processor.LABEL_CLUTTER)
                #print getTime(), filename
                #print getTime(), 'surface',np.sum(dict['labels'] == LABEL_SURFACE)
                #print getTime(), 'clutter',np.sum(dict['labels'] == LABEL_CLUTTER)
                #print getTime(), difference, "difference = np.sum(dict['labels'] == LABEL_SURFACE) - np.sum(dict['labels'] == LABEL_CLUTTER)"
                #print getTime(), ''
                if difference > 0:
                    clutter_features = (dict['features'])[np.nonzero(dict['labels'] == processor.LABEL_CLUTTER)]
                    if len(clutter_features) > 0: #if there are none, do nothin'
                        dict['set_size'] += difference
                        dict['features'] = np.vstack((dict['features'], clutter_features[np.random.randint(0,len(clutter_features),size=difference)]))
                        dict['labels'] = np.hstack((dict['labels'], np.ones(difference) * processor.LABEL_CLUTTER))
                elif difference < 0: 
                    surface_features = (dict['features'])[np.nonzero(dict['labels'] == processor.LABEL_SURFACE)]
                    if len(surface_features) > 0: #if there are none, do nothin'
                        difference = -difference
                        dict['set_size'] += difference
                        dict['features'] = np.vstack((dict['features'], surface_features[np.random.randint(0,len(surface_features),size=difference)]))
                        dict['labels'] = np.hstack((dict['labels'], np.ones(difference) * processor.LABEL_SURFACE))
                    
                training_set_size += dict['set_size']
                data.append(dict)
            #get next one
            self.processor.scan_dataset = self.processor.scans_database.get_next_dataset()
            #print getTime(),  self.scan_dataset
        
        #create training set:
        self.processor.scan_dataset = self.processor.scans_database.get_dataset(0)
        current_training_set_index = 0
        
       
        feature_vector_length = len(self.processor.features.get_indexvector(self.features))
        print getTime(), feature_vector_length
        #create dataset matrices:
        print getTime(), '#training set size ', training_set_size 
        
        #deactivate for now:
        max_traning_size = 1800000#2040000
        #if training_set_size < max_traning_size:
        if True:       
            train_data = cv.cvCreateMat(training_set_size,feature_vector_length,cv.CV_32FC1) #CvMat* cvCreateMat(int rows, int cols, int type)
            train_labels = cv.cvCreateMat(training_set_size,1,cv.CV_32FC1)
            
            for dict in data:        
                for index in range(dict['set_size']):
                    #only train on surface and clutter
                    if dict['labels'][index] == processor.LABEL_SURFACE or dict['labels'][index]== processor.LABEL_CLUTTER:
                    
                        #print getTime(), point3d
                        #print getTime(), 'fvindexv',self.get_features_indexvector(features)
                        #print getTime(), 'len', len(self.get_features_indexvector(features))
                        fv = (dict['features'][index])[self.processor.features.get_indexvector(self.features)]
    
                        #print getTime(), 'fv',fv
                        #print getTime(), np.shape(fv)
                        for fv_index, fv_value in enumerate(fv):
                            train_data[current_training_set_index][fv_index] = fv_value
                        train_labels[current_training_set_index] = dict['labels'][index]
    #                    for fv_index, fv_value in enumerate(fv):
    #                        print getTime(), train_data[current_training_set_index][fv_index]
    #                    print getTime(), '##',train_labels[current_training_set_index],'##'                    
                        #print getTime(), 'fv ', fv
                        #print getTime(), 'tr ',train_data[index]
                        current_training_set_index = current_training_set_index + 1
            
                        #if current_training_set_index % 4096 == 0:
                        #    print getTime(), 'label', dict['labels'][index], 'fv', fv        
                        if current_training_set_index %  16384 == 0:
                            print getTime(), 'reading features:', current_training_set_index, 'of', training_set_size, '(',(float(current_training_set_index)/float(training_set_size)*100.0),'%)'
    
        else:
            print getTime(), 'more than',max_traning_size,'features, sample from them...'
            #select 2040000 features:
            all_data = []
            all_labels = []
            for dict in data:  
                for index in range(dict['set_size']):
                    if dict['labels'][index] == processor.LABEL_SURFACE or dict['labels'][index]== processor.LABEL_CLUTTER:
                        fv = (dict['features'][index])[self.processor.features.get_indexvector(self.features)]
                        all_data += [fv]
                        all_labels += [dict['labels'][index]]
                        
                        current_training_set_index = current_training_set_index + 1    
                        if current_training_set_index %  16384 == 0:
                            print getTime(), 'reading features:', current_training_set_index, 'of', training_set_size, '(',(float(current_training_set_index)/float(training_set_size)*100.0),'%)'
            
            del data
            import random
            indices = np.array(random.sample(xrange(len(all_labels)),max_traning_size))
            all_data = np.asarray(all_data)
            all_labels = np.asarray(all_labels)
            
            all_data = all_data[indices]
            all_labels = all_labels[indices]
            
            train_data = cv.cvCreateMat(max_traning_size,feature_vector_length,cv.CV_32FC1) #CvMat* cvCreateMat(int rows, int cols, int type)
            train_labels = cv.cvCreateMat(max_traning_size,1,cv.CV_32FC1)
                        
            for index in range(max_traning_size):
                for fv_index, fv_value in enumerate(all_data[index]):
                    train_data[index][fv_index] = fv_value
                    train_labels[index] = all_labels[index]
                if index % 16384 == 0:
                    print getTime(), 'setting features:', (float(index)/float(max_traning_size))
          
          
        print getTime(), 'start training Classifier'

        type_mask = cv.cvCreateMat(1, feature_vector_length+1, cv.CV_8UC1)
        cv.cvSet( type_mask, cv.CV_VAR_NUMERICAL, 0)
        type_mask[feature_vector_length] = cv.CV_VAR_CATEGORICAL
        
        return (train_data, train_labels, type_mask)
Ejemplo n.º 6
0
 def load(self):
     self.cv_classifier = cv.CvDTree() #cv.CvBoost()
     print getTime(), 'loading Classifier',self.features
     self.cv_classifier.load(self.get_filename())
     
     
Ejemplo n.º 7
0
    Roadmap: we hope to improve types of inputs, and flexibility of ouputs.

'''
import roslib; roslib.load_manifest('clutter_segmentation')
roslib.load_manifest('pr2_clutter_helper')
import rospy
from sensor_msgs.msg import PointCloud

### Optional, below:
#       roslib.load_manifest('display_stuff'); import save_labeled_cloud;
#       Dependancy that will be integrated in later. Removed for now.

import acquire_pr2_data; #from pr2_clutter_svm_helper

from hrl_lib.util import getTime
print getTime(), 'START'

import processor
import configuration

try: from placement import Placement
except: print 'Cannot find placement.py for import.  Ignoring'
from label_object import label_object
from scan_dataset import scan_dataset

import numpy as np
import opencv as cv
import opencv.highgui as hg
print getTime(), 'IMPORTS DONE'

    def prepare(self, features_k_nearest_neighbors, nonzero_indices = None, all_save_load = False, regenerate_neightborhood_indices = False):
        #print np.shape(self.processor.pts3d_bound), 'shape pts3d_bound'

        imgTmp = cv.cvCloneImage(self.processor.img)
        self.imNP = ut.cv2np(imgTmp,format='BGR')
        self.processor.map2d = np.asarray(self.processor.map[0][0:2]) #copied from laser to image mapping
        
        if features_k_nearest_neighbors == None or features_k_nearest_neighbors == False: #use range
            self.kdtree2d = kdtree.KDTree(self.processor.pts3d_bound.T)
            
            #print len(nonzero_indices)
            #print np.shape(np.asarray((self.processor.pts3d_bound.T)[nonzero_indices]))
            
            if nonzero_indices != None:
                print getTime(), 'query ball tree for ', len(nonzero_indices), 'points'
                kdtree_query = kdtree.KDTree((self.processor.pts3d_bound.T)[nonzero_indices])
            else:
                print getTime(), 'query ball tree'
                kdtree_query = kdtree.KDTree(self.processor.pts3d_bound.T)
            
            filename = self.processor.config.path+'/data/'+self.processor.scan_dataset.id+'_sphere_neighborhood_indices_'+str(self.processor.feature_radius)+'.pkl'
            if all_save_load == True and os.path.exists(filename) and regenerate_neightborhood_indices == False:
                #if its already there, load it:
                print getTime(), 'loading',filename
                self.kdtree_queried_indices = ut.load_pickle(filename)    
            else:
                self.kdtree_queried_indices = kdtree_query.query_ball_tree(self.kdtree2d, self.processor.feature_radius, 2.0, 0.2) #approximate
                print getTime(), 'queried kdtree: ',len(self.kdtree_queried_indices),'points, radius:',self.processor.feature_radius
                if all_save_load == True:
                    ut.save_pickle(self.kdtree_queried_indices, filename)
                    
            #make dict out of list for faster operations? (doesn't seem to change speed significantly):
            #self.kdtree_queried_indices = dict(zip(xrange(len(self.kdtree_queried_indices)), self.kdtree_queried_indices))
        
        else: #experiemental: use_20_nearest_neighbors == True
            #TODO: exclude invalid values in get_featurevector (uncomment code there)
           
            self.kdtree2d = kdtree.KDTree(self.processor.pts3d_bound.T)
            self.kdtree_queried_indices = []
            print getTime(), 'kdtree single queries for kNN start, k=', features_k_nearest_neighbors
            count = 0
            for point in ((self.processor.pts3d_bound.T)[nonzero_indices]):
                count = count + 1
                result = self.kdtree2d.query(point, features_k_nearest_neighbors,0.2,2,self.processor.feature_radius)
                #existing = result[0][0] != np.Inf
                #print existing
                #print result[1]
                self.kdtree_queried_indices += [result[1]] #[existing]
                if count % 4096 == 0:
                    print getTime(),count
            print getTime(), 'kdtree singe queries end'
            
            #convert to numpy array -> faster access
            self.kdtree_queried_indices = np.asarray(self.kdtree_queried_indices)
        
        #print self.kdtree_queried_indices
        #takes long to compute:
        #avg_len = 0
        #minlen = 999999
        #maxlen = 0
        #for x in self.kdtree_queried_indices:
        #    avg_len += len(x)
        #    minlen = min(minlen, len(x))
        #    maxlen = max(maxlen, len(x))
        #avg_len = avg_len / len(self.kdtree_queried_indices)
        #print getTime(), "range neighbors: avg_len", avg_len, 'minlen', minlen, 'maxlen', maxlen
        
        
        #create HSV numpy images:
        # compute the hsv version of the image 
        image_size = cv.cvGetSize(self.processor.img)
        img_h = cv.cvCreateImage (image_size, 8, 1)
        img_s = cv.cvCreateImage (image_size, 8, 1)
        img_v = cv.cvCreateImage (image_size, 8, 1)
        img_hsv = cv.cvCreateImage (image_size, 8, 3)
        
        cv.cvCvtColor (self.processor.img, img_hsv, cv.CV_BGR2HSV)
        
        cv.cvSplit (img_hsv, img_h, img_s, img_v, None)
        self.imNP_h = ut.cv2np(img_h)
        self.imNP_s = ut.cv2np(img_s)
        self.imNP_v = ut.cv2np(img_v)
        
        textures = texture_features.eigen_texture(self.processor.img)
        self.imNP_tex1 = textures[:,:,0]
        self.imNP_tex2 = textures[:,:,1]
        
        self.debug_before_first_featurevector = True
        
        self.generate_voi_histogram(self.processor.point_of_interest,self.processor.voi_width)
    def get_featurevector(self, index, count, pts = None):
        if pts == None:
            pts = self.processor.pts3d_bound

        #print 'i',index,'c', count
        fv = []
        
        indices = np.asarray(self.kdtree_queried_indices[count])
        invalid_value = np.shape(pts)[1]
        #print indices
        #print 'iv',invalid_value
        indices = indices[indices != invalid_value]
        
        #print getTime(), indices
        #print getTime(), 'number of pts', len(indices)
        a = pts[:,indices]
        view = processor.rotate_to_plane(self.processor.scan_dataset.ground_plane_normal, np.matrix([-1,0,0.]).T)
        normal, eigenvalues = gaussian_curvature.gaussian_curvature(a,view)
        #eigenvalues = eigenvalues / np.square(r)
        #fv += [normal[0,0],0,normal[2,0]]
        #fv += normal.T.A[0].tolist()
        #fv += eigenvalues.tolist()
        #print np.asarray(pts[:,index].T[0])[0]
       # print 'pt',np.asarray(pts[:,index].T[0])
        point = pts[:,index]
        
        ev1, ev2 = self.get_voi_histogram_spread(point)
        #z_max_height_diff = pts[2,index] - self.get_voi_maxcount_height()
        #fv += [self.get_voi_histogram_value(point),z_max_height_diff,normal[0,0],normal[1,0],normal[2,0], ev1, ev2]
        fv += [self.get_voi_histogram_value(point),normal[0,0],normal[1,0],normal[2,0], ev1, ev2]
        
        h = self.imNP_h[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        s = self.imNP_s[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        i = self.processor.intensities_bound[index]
        hsi = self.get_voi_hsi_histogram_values(point,h,s,i)
        fv += [hsi[0],hsi[1],hsi[2]]
        #print np.shape(self.imNP_tex1)
        #print np.shape(self.map2d)
        tex1 = self.imNP_tex1[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        tex2 = self.imNP_tex2[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        fv += [tex1, tex2]
        #print tex1, tex2
        

        #color histograms:
        colors_h = []
        colors_s = []
        colors_v = []
        for idx in indices:
            colors_h.append(float(self.imNP_h[self.processor.map2d[1,idx],self.processor.map2d[0,idx]]))
            colors_s.append(float(self.imNP_s[self.processor.map2d[1,idx],self.processor.map2d[0,idx]]))
            colors_v.append(float(self.imNP_v[self.processor.map2d[1,idx],self.processor.map2d[0,idx]]))
        
        color_hist = stats.histogram2(np.array(colors_h), [0,51,102,153,204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        color_hist = stats.histogram2(np.array(colors_s), [0,51,102,153,204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        color_hist = stats.histogram2(np.array(colors_v), [0,51,102,153,204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        
        #intensities
        intensities = self.processor.intensities_bound[indices]
        intensities = np.asarray(intensities)
        #map to 0-255-range:   TODO: perhaps do some nonlinear transformation here? 
        intensities = intensities / 10000 * 255
        intensity_hist = stats.histogram2(intensities, [0,51,102,153,204])
        intensity_hist = intensity_hist / float(np.sum(intensity_hist))
        intensity_hist = list(intensity_hist)
        fv += intensity_hist    
    
        #current colors:
        fv += [float(self.imNP_h[self.processor.map2d[1,index],self.processor.map2d[0,index]]) / 255.0]
        fv += [float(self.imNP_s[self.processor.map2d[1,index],self.processor.map2d[0,index]]) / 255.0]
        fv += [float(self.imNP_v[self.processor.map2d[1,index],self.processor.map2d[0,index]]) / 255.0]  
        
        #current intensity value (scaled)
        intensity = self.processor.intensities_bound[index]
        #scale:
        intensity = intensity / 15000.0
        intensity = [intensity]
        fv += intensity  

        
        if self.debug_before_first_featurevector == True:
            self.debug_before_first_featurevector = False
            print getTime(), 'feature vector sample(gaussian histograms):', fv
        return fv
Ejemplo n.º 10
0
    def test_results(self, dict, labels):
        current_set_size = dict['set_size']
        count_correct = 0
        count_clutter_correct = 0
        count_surface_correct = 0
        count_clutter = 0
        count_surface = 0
        count = 0
        for index in dict['point_indices']:
            label = labels[index]
            
            if label == dict['labels'][count]:
                count_correct += 1
                
            if dict['labels'][count] == processor.LABEL_CLUTTER:
                count_clutter += 1
                if label == dict['labels'][count]:
                    count_clutter_correct += 1
            if dict['labels'][count] == processor.LABEL_SURFACE:
                count_surface += 1
                if label == dict['labels'][count]:
                    count_surface_correct += 1                    

            count += 1        
        
        print getTime(), '##########################################'
        print getTime(), '####tested on ', self.features, '###########################'
        print getTime(), '==================================='
        print getTime(), 'percent in total: surface:',(float(count_surface)/float(current_set_size)*100), '%, clutter:',(float(count_clutter)/float(current_set_size)*100),'%'
        print getTime(), '#points surface:',count_surface,'clutter:',count_clutter
        print getTime(), '#points correct: surface:',count_surface_correct,'clutter:',count_clutter_correct
        if count_surface > 0:
            percent_surface_correct = float(count_surface_correct)/float(count_surface) * 100
        else:
            percent_surface_correct = 100
        if count_clutter > 0:
            percent_clutter_correct = float(count_clutter_correct)/float(count_clutter) * 100
        else:
            percent_clutter_correct = 100
        print getTime(), '#percent correct: surface:',percent_surface_correct,'clutter:',percent_clutter_correct
        print getTime(), '==================================='
        print getTime(), '##########################################'
        testresults = (count_surface, count_clutter,count_surface_correct, count_clutter_correct, percent_surface_correct, percent_clutter_correct)
    
        return testresults