コード例 #1
0
    def train(self, task_id, training_set, testing_set=None, **kwargs):
        print 'vm trainer called!'
        
        # connect to database manager
        self.db_manager = MongoDBManager(HogSvmTrainer.dbname, ip_addr='128.2.213.107')
        self.storage_manager = StorageManager('test1')

        local_tmp_dir = os.path.abspath(HogSvmTrainer.local_tmp_dir)

        # download images to cache and get all bounding_boxes in memory:
        obj_image_bounding_box = \
            MyUtils.downloadImgWithBx(local_tmp_dir, self.db_manager, self.storage_manager, training_set)

        # create output dir structure
        output_dir_prefix = os.path.abspath('./classifier/hog_svm_')
        # create an output folder for each task
        if 'id' in kwargs:
            output_dir = output_dir_prefix + 'id'
        else:
            timestamp = time.strftime("%m_%d_%y_%H_%M_%S")
            output_dir = output_dir_prefix + timestamp
        MyUtils.create_dir(output_dir)

        # generate vm specific input files
        # for each object, create a folder for train.txt, test.txt, and final classifier svm
        obj_output_dirs = {}
        for each_obj, obj_info in obj_image_bounding_box.iteritems():
            train_obj_info, test_obj_info = self.split_train_file(obj_info, 10)
            cur_obj_dir = output_dir + '/' + each_obj
            obj_output_dirs[each_obj] = cur_obj_dir
            MyUtils.create_dir(cur_obj_dir)
            self.genInputFileForObj(each_obj, train_obj_info, cur_obj_dir+'/' + 'train_'+each_obj+'.xml')
            self.genInputFileForObj(each_obj, test_obj_info, cur_obj_dir+'/' + 'test_'+each_obj+'.xml')
            # split the training file randomly for generating testing file

        for each_obj, each_obj_dir in obj_output_dirs.iteritems():
            print 'calling dlib to train at folder {}'.format(each_obj_dir)
            detector_name = each_obj + '.svm'
            detector_path = each_obj_dir + '/' + detector_name
            accuracy = self.train_with_dlib(each_obj_dir, detector_path, 'train_'+each_obj +'.xml', 'test_'+each_obj+'.xml')

            # save the classifier into s3 and database
            accuracy = "Testing accuracy: {}".format(accuracy)

        MyUtils.zipdir(output_dir)
        zip_file = output_dir + '.zip'
        print 'uploading file to s3...'
        key = self.storage_manager.uploadFile(zip_file)
        print 'updating database...'

        # TODO: maybe send this information to controller?
        # add meta data info to database
        classifier_metadata={'name': key,
                             'training_set':training_set,
                             'testing_set':testing_set,
                             'accuracy': 'see log',
                             'filepath': key}
        self.db_manager.addClassifier(classifier_metadata)
        return 'succesfully added ' + str(classifier_metadata)
コード例 #2
0
    def train(self, task_id, training_set, testing_set, **kwargs):
        print 'caffe train called'

        self.output_dir_prefix = os.path.abspath('./classifier/caffenet_')
        # create an output folder for each task
        if None != task_id:
            self.output_dir = self.output_dir_prefix + str(task_id)
        else:
            timestamp = time.strftime("%m_%d_%y_%H_%M_%S")
            self.output_dir = self.output_dir_prefix + timestamp

        # TODO: changed to user input parameter?
        max_iter = 500
        if 'iteration' in kwargs:
            max_iter = kwargs['iteration']

        MyUtils.create_dir(self.output_dir)


        # connect to database manager
        self.db_manager = MongoDBManager(CaffeNetTrainer.dbname, ip_addr='128.2.213.107')
        self.storage_manager = StorageManager('test1')

        # make intermediate folder
        self.ori_image_dir = CaffeNetTrainer.local_tmp_dir + '/' +'original'
        self.crop_image_dir = CaffeNetTrainer.local_tmp_dir + '/' +'crop'
        self.cache_manager = cacheManager(self.ori_image_dir)
        MyUtils.create_dir(self.ori_image_dir)
        MyUtils.create_dir(self.crop_image_dir)
        # get image file list and bounding boxes

        # get objects regardless of their videos, since same object in different video belongs to same identitfy
        # summarize to get all objects
        obj_set = {}
        for obj, vid in training_set.iteritems():
            if obj not in obj_set:
                obj_set[obj] = []
            obj_set[obj].extend(vid)
        print obj_set

        # for each object, get images
        training_set_obj_file_path ={}
        for obj, vids in obj_set.iteritems():
            # 1. download image
            # 2. crop image based on bounding boxes
            obj_image_path = []
            obj_bounding_boxes_with_image = {}
            for vid in vids:
                obj_bounding_boxes_each_vid = self.db_manager.getBoundingBoxWithImageByVidAndObj(vid, obj)
                # TODO: probably can be a bit more efficient. right now is downloading vid repeatedly if it appears
                # multiple times in train yaml
                self.cache_manager.cacheImageFiles(vid, dir=False)
                obj_bounding_boxes_with_image.update(obj_bounding_boxes_each_vid)

            for image, bx_list in obj_bounding_boxes_with_image.iteritems():
#                self.storage_manager.download(image, self.ori_image_dir)
                idx =0
                for bx in bx_list:
                    im = Image.open(self.ori_image_dir+'/'+image)
                    left_x = bx[0]
                    left_y = bx[1]
                    right_x = left_x + bx[2]
                    right_y = left_y + bx[3]
                    output_file_path = self.crop_image_dir + '/' +os.path.splitext(image)[0] + '_' + str(obj) + str(idx) \
                                       + os.path.splitext(image)[1]
                    im.crop((left_x,left_y, right_x, right_y)).save(output_file_path)
                    obj_image_path.append(output_file_path)
                    idx +=1
            training_set_obj_file_path[obj] = obj_image_path

        # generate training file
        training_set_obj_file_path, testing_set_obj_file_path =self.split_train_file(training_set_obj_file_path, 10)
        self.generate_caffe_train_file(training_set_obj_file_path, self.output_dir + '/train.txt')
        self.generate_caffe_train_file(testing_set_obj_file_path, self.output_dir +'/test.txt')
        # generate label file for corresponds label to object names
        self.generate_caffe_label_file(training_set_obj_file_path, self.output_dir + '/label.txt')

        # modify network prototxt
        num_output_category = len(obj_set)

        train_file_path = os.path.abspath(self.output_dir + '/train.txt')
        test_file_path = os.path.abspath(self.output_dir + '/test.txt')

        output_train_val_path = self.output_dir + '/' + 'train_val_custom.prototxt'
        output_solver_path = self.output_dir + '/' + 'solver_custom.prototxt'
        output_deploy_path = self.output_dir + '/' + 'deploy_custom.prototxt'
        # fine-tuning output
        self.custom_net_path = self.output_dir + '/train_val_custom.prototxt'
        self.custom_snapshot_prefix= self.output_dir + '/caffenet_custom'
        self.output_layer_name ="fc8_custom"
        self.output_net_name="CaffeNetCustom"
        # original model
        self.original_output_layer_name ='fc8'
        # reference design file locations
        input_file_prefix = os.path.abspath('./bvlc_reference_caffenet')
        train_file ='train_val.prototxt'
        solver_file ='solver.prototxt'
        deploy_file ='deploy.prototxt'

        self.mod_caffe_net(input_file_prefix+'/' + train_file, num_output_category, train_file_path,
                           test_file_path, output_train_val_path)
        self.mod_caffe_solver(input_file_prefix+ '/' +solver_file,
                              max_iter, output_solver_path)
        self.mod_caffe_deploy(input_file_prefix + '/' + deploy_file,
                              num_output_category, output_deploy_path)

        cmd ="/opt/caffe/build/tools/caffe train"
        cmd += " -solver " + output_solver_path
        cmd += " -weights ./bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"
        cmd += " | tee " + self.output_dir + '/' + 'log.txt'

        print cmd
        # issue train
        os.system(cmd)
        MyUtils.zipdir(self.output_dir)
        zip_file = self.output_dir + '.zip'
        print 'uploading file to s3...'
        key = self.storage_manager.uploadFile(zip_file)
        print 'updating database...'

        # TODO: maybe send this information to controller?
        # add meta data info to database
        classifier_metadata={'name': key,
                             'training_set':training_set,
                             'testing_set':testing_set,
                             'accuracy': 'see log',
                             'filepath': key}
        self.db_manager.addClassifier(classifier_metadata)
        return