Ejemplo n.º 1
0
 def set_samples(self, sample_bboxs):
     timer = common.Timer()
     bboxs = self.get_bbox_array(sample_bboxs)
     self.sample_bbox.set_value(bboxs)
     self.sample_bbox_list = sample_bboxs
     logging.debug("Took %i ms to set_samples" % timer.current_ms())
     return bboxs
Ejemplo n.º 2
0
    def predict_output(self, dataset):
        dataset_x, dataset_y, dataset_size = dataset.export(self.batch_size)

        #dummy call to build function
        self.predict_output_step(dataset_x[:self.batch_size])

        #evaluate function
        timer = common.Timer()
        n = math.ceil(dataset_size / self.batch_size)
        pr = []
        for index in range(n):
            data_x = dataset_x[index * self.batch_size:(index + 1) *
                               self.batch_size]
            pr_batch = self.predict_output_step(data_x)
            pr.append(pr_batch)
        pr = numpy.concatenate(pr, axis=0)

        logging.verbose("Prediction took %.3f sec for %i samples" %
                        (timer.current(), pr.shape[0]))

        #crop dummy data
        if (dataset_size % self.batch_size) != 0:
            s = [dataset_size] + list(pr.shape[1:])
            pr.resize(tuple(s), refcheck=False)

        return pr
Ejemplo n.º 3
0
 def get_bbox_array(self, sample_bboxs):
     timer = common.Timer()
     bboxs = numpy.zeros(
         (self.batch_size, self.sample_num, self.sample_num, 4),
         dtype=numpy.float32)
     c_code.build_bbox_array(sample_bboxs, bboxs)
     logging.debug("Took %i ms to get_bbox_array" % timer.current_ms())
     return bboxs
Ejemplo n.º 4
0
 def run(self):
     logging.info("Exporting subset (%i/%i)" %
                  (self.subset + 1, self.data.subset_num))
     timer = common.Timer()
     self.data.load_from_subset(self.subset)
     timer.mark()
     self.data_export = self.data.export(self.batch_size)
     timer.mark()
     logging.info(
         "Finished exporting subset (%i/%i)" %
         (self.subset + 1, self.data.subset_num),
         "- load took %i sec, export took %i sec" %
         (timer.delta(0), timer.delta(1)))
Ejemplo n.º 5
0
    def get_samples(self, data_x, train=False, store_shared=False):

        global profile
        if self.corner_func is None:
            logging.verbose("Building corner function - store samples:",
                            store_shared, "train:", train)
            updates = [(self.corner_layer.sample_shared,
                        self.corner_layer.sample)] if store_shared else []
            self.corner_func = theano.function(
                [self.model_input],
                self.corner_layer.corner_pr,
                updates=updates,
                profile=profile,
                givens=[(get_train(), tensor.cast(int(train), 'int8'))],
                on_unused_input='ignore')

        #find corners
        timer = common.Timer()
        logging.debug("Running corner function")
        corner_pr = self.corner_func(data_x)

        if profile:
            logging.debug("Profiling corner function")
            theano_util.profile(self.corner_func, 10, data_x)
            theano_util.export_graph("./corner.graph", self.corner_func)
            logging.debug("Done")
            exit(0)

        #build sampling bounding boxs
        timer.mark()
        logging.debug("Build samples (%i threads)" % self.thread_num)
        samples = c_code.build_samples(self.thread_num, corner_pr,
                                       self.corner_threshold, self.sample_num,
                                       self.corner_max, self.local_max,
                                       self.nms_threshold)

        timer.mark()
        logging.verbose(
            "Took %i ms to get_samples (%i model, %i build, %i max corners) " %
            (timer.current_ms(), timer.delta_ms(0), timer.delta_ms(1),
             self.corner_max))
        return samples
Ejemplo n.º 6
0
    def train_epoch(self,
                    dataset,
                    epoch,
                    learning_rate,
                    momentum=[0, 1, 0],
                    decay=0.0,
                    solver_mode="sgd"):

        #train over batches (assume dataset size is mulitple of batch_size!)
        logging.info("Evaluating training function")
        dataset_x, dataset_m, dataset_size = dataset.export(self.batch_size)
        index_num = math.ceil(dataset_size / self.batch_size)
        total_cost = 0
        for index in range(index_num):

            #upload data to GPU and perform train step
            timer = common.Timer()
            data_x = dataset_x[index * self.batch_size:(index + 1) *
                               self.batch_size]
            data_m = dataset_m[index * self.batch_size:(index + 1) *
                               self.batch_size]
            cost, _ = self.train_step(data_x, data_m, epoch, self.iteration,
                                      learning_rate, momentum, decay)

            #watch out for GPU's randomly producing NaN!
            if math.isnan(cost):
                raise Exception("ERROR: Cost is NaN")

            logging.verbose(
                "Batch %i.%i - iteration: %i cost:" %
                (epoch, index * self.batch_size, self.iteration), cost,
                "took: %i ms" % timer.current_ms())
            total_cost += cost
            self.iteration += 1

        return total_cost
Ejemplo n.º 7
0
def main():

    #load arguments:
    parser = argparse.ArgumentParser(
        description='Train a convolutional network using labelled data.')
    logging.add_arguments(parser)
    parser.add_argument("--model",
                        required=False,
                        default=None,
                        help="Model to continue training.")
    parser.add_argument("--cost-factors",
                        default=[],
                        nargs="+",
                        help="Multiplicative factors for model costs")
    parser.add_argument(
        "--thread-num",
        type=int,
        default=1,
        help=
        "Number of threads to use for supported opeartions (e.g. loading/distorting datasets)"
    )
    parser.add_argument("--extension",
                        default="ppm",
                        help="Image file extension")
    parser.add_argument("--train",
                        default=None,
                        help="The folder with training / validation data")
    parser.add_argument("--test",
                        default=None,
                        help="The folder with testing data (optional)")
    parser.add_argument("--test-epochs",
                        type=int,
                        default=1,
                        help="Epochs between each test evaluation")
    parser.add_argument("--test-mode",
                        default="default",
                        help="Mode to use for testing")
    parser.add_argument(
        "--border-mode",
        default="valid",
        help="Border mode for convolutional layers (full, valid)")
    parser.add_argument("--output-prefix",
                        default="./model",
                        help="Output prefix for model files")
    parser.add_argument(
        "--activation",
        default="relu",
        help=
        "Activation function used in convolution / hidden layers (tanh, relu, leaky-relu)"
    )
    parser.add_argument("--solver", type=str, default="nesterov", help="")
    parser.add_argument("--weight-init",
                        nargs="+",
                        default=["he-backward"],
                        help="Weight initialization scheme")
    parser.add_argument("--learn-rate",
                        type=float,
                        default=0.1,
                        help="Learning rate for weights and biases.")
    parser.add_argument(
        "--learn-momentum",
        type=float,
        default=[0.0, 0.0],
        nargs="+",
        help="Learning momentum for weights and biases (0.0 - 1.0).")
    parser.add_argument(
        "--learn-anneal",
        type=float,
        default=1,
        help="Annealing factor per epoch for weight and bias learning rate")
    parser.add_argument(
        "--learn-anneal-epochs",
        nargs="+",
        type=int,
        default=[],
        help="Epochs to apply learning rate annealing (default every epoch)")
    parser.add_argument("--learn-decay",
                        type=float,
                        default=0.0,
                        help="L2 weight decay (not applied to biases). ")
    parser.add_argument("--epochs",
                        type=int,
                        default=30,
                        help="The number of training epochs")
    parser.add_argument("--max-samples",
                        type=int,
                        default=None,
                        help="Maximum samples to load from training set")
    parser.add_argument("--batch-size",
                        type=int,
                        default=32,
                        help="Size of processing batchs")
    parser.add_argument("--seed",
                        type=int,
                        default=23455,
                        help="Random Seed for weights")
    parser.add_argument(
        "--distort-mode",
        default=[],
        nargs="+",
        help="Distortions to apply to training data (default, cifar10, disable)"
    )
    parser.add_argument("--disable-intermediate",
                        default=False,
                        action="store_true",
                        help="Disable outputting of intermediate model files")
    parser.add_argument(
        "--augment-mirror",
        default=False,
        action="store_true",
        help="Augment training data with horizontally mirrored copies")
    parser.add_argument("--skip-train",
                        default=False,
                        action="store_true",
                        help="Skip training of model")
    parser.add_argument("--skip-layer-updates",
                        type=int,
                        nargs="+",
                        default=[],
                        help="Skip training updates to specified layers")
    parser.add_argument("--model-desc",
                        default=[
                            "C[100,7]", "P[2]", "C[150,4]", "P[2]", "C[250,4]",
                            "P[2]", "C[300,1]", "R"
                        ],
                        nargs="+",
                        type=str,
                        help="Network layer description")
    args = parser.parse_args()

    logging.init(args)

    #set random seeds
    random.seed(args.seed)
    numpy.random.seed(args.seed)

    #load training dataset
    logging.info("Loading training data:", args.train)
    train_data = dataset.load(args.train,
                              args.extension,
                              is_training=True,
                              thread_num=args.thread_num)
    data_shape = train_data.get_data_shape()
    class_num = train_data.get_class_num()
    class_labels = train_data.class_labels
    logging.info("Found %i class labels:\n" % class_num, class_labels)

    #hack for reducing training data size
    if not args.max_samples is None:
        train_data.data = random.sample(train_data.data, args.max_samples)

    #mirror training data
    if args.augment_mirror:
        train_data.augment_mirror()

    logging.info("Training: %i samples" % len(train_data))

    #load test dataset
    if args.test:
        logging.info("Loading test: " + args.test)
        test_data = dataset.load(args.test,
                                 args.extension,
                                 is_training=False,
                                 thread_num=args.thread_num,
                                 class_labels=class_labels)

    #initialize model
    model = model_cnn.initialize(args, data_shape, class_labels, class_num)
    model.build_train_func(args.solver, args.cost_factors)

    #Run training
    best_test_error = 100.0
    learn_rate = args.learn_rate
    for epoch in range(args.epochs):
        logging.info("----- Training Epoch: %i -----" % epoch)

        #perform training
        if not args.skip_train:

            logging.info("Training with solver " + args.solver +
                         ", learning rate " + str(learn_rate) +
                         " and momentum " + str(args.learn_momentum))

            #shuffle dataset:
            train_data.shuffle()

            for subset in range(train_data.subset_num):
                timer = common.Timer()
                train_data.load_from_subset(subset)

                logging.info("Performing Gradient Descent...")
                cost = model.train_epoch(train_data, epoch, learn_rate,
                                         args.learn_momentum, args.learn_decay)

                nbatch = math.ceil(len(train_data) / model.batch_size)
                logging.info("Training subset %i - Cost: %.3f, Took %.1f sec" %
                             (subset, cost, timer.current()))

        if len(args.learn_anneal_epochs) == 0 or (
                epoch + 1) in args.learn_anneal_epochs:
            logging.verbose("Annealing learning rate")
            learn_rate *= args.learn_anneal

        #perform testing
        test_error = 0
        if not args.test is None and ((epoch % args.test_epochs) == 0
                                      or epoch == (args.epochs - 1)):
            test_error, test_class_errors = compute_error(test_data, model)
            logging.info(
                "Epoch %i test error: %.2f%% (%i samples)" %
                (epoch, test_error, int(test_error * len(test_data) / 100.0)))
            save_results(args.output_prefix + "_epoch%03i.test" % epoch,
                         test_error, test_class_errors)

        #save intermediate models
        if not args.disable_intermediate:
            model_cnn.save_to_file(
                model, args.output_prefix + "_epoch%03i.mdl.gz" % (epoch))

    #save final model
    model_cnn.save_to_file(
        model, args.output_prefix + "_epoch%03i_final.mdl.gz" % epoch)
    logging.info("Finished Training")
Ejemplo n.º 8
0
    def get_detections(self, model, data_x, data_m, params):

        pr_threshold = params.get("prThreshold", 0.01)
        nms_threshold = params.get("nmsThreshold", 0.5)
        corner_threshold = params.get("cornerThreshold",
                                      self.sparse_layer.corner_threshold)
        corner_max = params.get("cornerMax", 1024)
        t = (pr_threshold, nms_threshold, corner_threshold, corner_max)
        logging.verbose(
            "Using detection params - pr threshold: %f, nms threshold: %f, corner_threshold: %f, corner_max: %i"
            % t)

        first_detect = False
        if self.detect_func is None:

            #get all model outputs
            outputs = []
            outputs.append(self.det_pr)
            if self.use_bbox_reg:
                outputs.append(self.bbox_reg)

            logging.info("Building detection function")
            self.detect_func = theano.function([model.input],
                                               outputs,
                                               givens=[(get_train(),
                                                        tensor.cast(0,
                                                                    'int8'))],
                                               on_unused_input='ignore')

            logging.verbose("Exporting graph...")
            with open("detect_graph.txt", "w") as f:
                theano.printing.debugprint(self.detect_func,
                                           file=f,
                                           print_type=True)

            first_detect = True

        #get sampling bounding boxs
        logging.verbose("Detecting sample bboxs (%.2f)" % corner_threshold)
        timer = common.Timer()
        sample_bboxs = self.sparse_layer.get_samples(data_x,
                                                     train=False,
                                                     store_shared=True)
        timer.mark()
        logging.verbose("Found sample bboxs: {}".format(
            [len(bbox) for bbox in sample_bboxs]))

        #upload sampling bounding boxs
        bboxs = self.sparse_layer.set_samples(sample_bboxs)
        timer.mark()

        #classify sampling bounding boxs
        r = list(self.detect_func(data_x))

        #get outputs
        det_pr = r[0]
        r_index = 1

        if self.use_bbox_reg:
            bbox_reg = r[r_index]
            r_index += 1

            #update bbox array
            bboxs_cx = 0.5 * (bboxs[:, :, :, 0] + bboxs[:, :, :, 2])
            bboxs_cy = 0.5 * (bboxs[:, :, :, 1] + bboxs[:, :, :, 3])
            bboxs_w = bboxs[:, :, :, 2] - bboxs[:, :, :, 0]
            bboxs_h = bboxs[:, :, :, 3] - bboxs[:, :, :, 1]
            predict_cx = bbox_reg[:, 0, :, :] * bboxs_w + bboxs_cx
            predict_cy = bbox_reg[:, 1, :, :] * bboxs_h + bboxs_cy
            predict_w = numpy.exp(bbox_reg[:, 2, :, :]) * bboxs_w
            predict_h = numpy.exp(bbox_reg[:, 3, :, :]) * bboxs_h
            bboxs[:, :, :, 0] = predict_cx - predict_w * 0.5
            bboxs[:, :, :, 1] = predict_cy - predict_h * 0.5
            bboxs[:, :, :, 2] = predict_cx + predict_w * 0.5
            bboxs[:, :, :, 3] = predict_cy + predict_h * 0.5

        timer.mark()
        detlists = c_code.build_detections_nms(pr_threshold, nms_threshold,
                                               det_pr, bboxs,
                                               [len(s) for s in sample_bboxs])
        timer.mark()

        logging.verbose("Found detections:",
                        [len(detlist) for detlist in detlists])
        logging.verbose(
            "FPS=%.1f, Timing (ms) - get samples: %i, upload: %i, classify: %i, build+nms %i"
            % tuple([self.batch_size / timer.current()] + timer.deltas_ms()))

        if not first_detect:
            global detect_time, detect_num
            detect_time += timer.current()
            detect_num += self.batch_size
            logging.info("Average FPS=%.1f" % (detect_num / detect_time))

        #results format
        results = []
        for i, detlist in enumerate(detlists):
            results.append({"detections": detlist, "meta": data_m[i]})

        return results
Ejemplo n.º 9
0
    def get_target(self, model, samples, metas):

        timer = common.Timer()

        #build sample
        det_pr = numpy.zeros(self.det_shape, dtype=numpy.float32)
        det_pr[:, self.null_class, ...] = 1.0

        if self.use_bbox_reg:
            bbox_valid = numpy.zeros(
                (self.batch_size, self.sample_num, self.sample_num),
                dtype=numpy.float32)
            bbox_reg = numpy.ones(
                (self.batch_size, 8, self.sample_num, self.sample_num),
                dtype=numpy.float32)

        for b, meta in enumerate(metas):

            samples = [
                bbox for _, bbox in self.sparse_layer.sample_bbox_list[b]
            ]
            if len(meta["bbox"]) > 0 and len(samples) > 0:
                overlap = theano_util.get_overlap_iou(meta["bbox"], samples)
                bbox_indexs, sample_indexs = numpy.where(
                    overlap > self.overlap_threshold)
                for obj, index in zip(bbox_indexs.tolist(),
                                      sample_indexs.tolist()):
                    sample_i = index % self.sparse_layer.sample_num
                    sample_j = index // self.sparse_layer.sample_num
                    sample_cls = meta["class"][obj]
                    sample_bbox = samples[index]
                    det_pr[b, sample_cls, sample_j, sample_i] = 1.0
                    det_pr[b, self.null_class, sample_j, sample_i] = 0.0

                if self.use_bbox_reg:
                    overlap_max = overlap.argmax(axis=0)
                    for index in range(len(samples)):
                        obj = overlap_max[index]
                        if overlap[obj, index] <= self.overlap_threshold:
                            continue

                        sample = samples[index]
                        target = meta["bbox"][obj]
                        sample_i = index % self.sparse_layer.sample_num
                        sample_j = index // self.sparse_layer.sample_num
                        bbox_valid[b, sample_j, sample_i] = 1.0
                        bbox_reg[b, 0, sample_j,
                                 sample_i] = 0.5 * (target[0] + target[2])
                        bbox_reg[b, 1, sample_j,
                                 sample_i] = 0.5 * (target[1] + target[3])
                        bbox_reg[b, 2, sample_j,
                                 sample_i] = target[2] - target[0]
                        bbox_reg[b, 3, sample_j,
                                 sample_i] = target[3] - target[1]
                        bbox_reg[b, 4, sample_j,
                                 sample_i] = 0.5 * (sample[0] + sample[2])
                        bbox_reg[b, 5, sample_j,
                                 sample_i] = 0.5 * (sample[1] + sample[3])
                        bbox_reg[b, 6, sample_j,
                                 sample_i] = sample[2] - sample[0]
                        bbox_reg[b, 7, sample_j,
                                 sample_i] = sample[3] - sample[1]

        #normalize probabilities
        det_pr /= det_pr.sum(axis=1)[:, None, ...]

        #normalize by number of samples
        nfactor = self.sample_num * self.sample_num
        det_pr /= nfactor
        if self.use_bbox_reg:
            bbox_valid /= nfactor

        #pack indices / values
        yt_value = det_pr.flatten()
        if self.use_bbox_reg:
            yt_value = numpy.concatenate(
                (yt_value, bbox_valid.flatten(), bbox_reg.flatten()))

        return numpy.array([], dtype=numpy.int64), yt_value
Ejemplo n.º 10
0
def main():

    #load arguments:
    parser = argparse.ArgumentParser(
        description='Train a convolutional network using labelled data')
    logging.add_arguments(parser)
    parser.add_argument(
        "--use-acc-mode",
        default=False,
        action="store_true",
        help="Use model accumulation over multiple batches (uses more VRAM)")
    parser.add_argument("--cost-factors",
                        default=[],
                        nargs="+",
                        help="Multiplicative factors for model costs")
    parser.add_argument("--export-model-dims",
                        default=False,
                        action="store_true",
                        help="Ignore, don't use this option!")
    parser.add_argument("--model-dims",
                        default="./model-dims.json",
                        type=str,
                        help="export file for shared model dimensions")
    parser.add_argument(
        "--model-save-dt",
        default=30,
        type=int,
        help=
        "Minimum time (min) between saving an intermediate model. Use 0 to disable."
    )
    parser.add_argument("--model",
                        required=False,
                        default=None,
                        help="Model to continue training.")
    parser.add_argument("--gpus",
                        nargs="+",
                        default=["gpu0"],
                        help="list of gpus to train over")
    parser.add_argument(
        "--update-server",
        metavar="<addr> [port] [offset] [delta]",
        nargs="+",
        default=None,
        help=
        "model update server for synchronizing multiple networked machines. Set <addr> to 'mpi' for MPI networking."
    )
    parser.add_argument(
        "--subset-max",
        type=int,
        default=10000000,
        help=
        "Specify maximum number of subsets to be used in each training epoch")
    parser.add_argument("--train",
                        default=None,
                        help="The folder with training / validation data")
    parser.add_argument("--test",
                        default=None,
                        help="The folder with testing data (optional)")
    parser.add_argument("--test-mode", default="default", help="Testing Mode")
    parser.add_argument("--test-epochs",
                        type=int,
                        default=1,
                        help="Epochs between each test evaluation")
    parser.add_argument(
        "--thread-num",
        type=int,
        default=1,
        help=
        "Number of threads to use for supported opeartions (e.g. loading/distorting datasets)"
    )
    parser.add_argument("--extension",
                        default="ppm",
                        help="Image file extension")
    parser.add_argument(
        "--activation",
        default="relu",
        help=
        "Activation function used in convolution / hidden layers (tanh, relu, leaky-relu)"
    )
    parser.add_argument(
        "--border-mode",
        default="half",
        help="Border mode for convolutional layers (full, valid)")
    parser.add_argument("--output-prefix",
                        default="./model",
                        help="Output prefix for model files")
    parser.add_argument("--solver", type=str, default="nesterov", help="")
    parser.add_argument("--weight-init",
                        nargs="+",
                        default=["he-backward"],
                        help="Weight initialization scheme")
    parser.add_argument("--initial-tune",
                        type=float,
                        default=0.0,
                        help="Perform initial tuning with learning rate")
    parser.add_argument("--learn-rate",
                        type=float,
                        default=0.1,
                        help="Learning rate for weights and biases.")
    parser.add_argument(
        "--learn-momentum",
        type=float,
        default=[0.0, 0.0],
        nargs="+",
        help="Learning momentum for weights and biases (0.0 - 1.0).")
    parser.add_argument(
        "--learn-anneal",
        type=float,
        default=1,
        help="Annealing factor per epoch for weight and bias learning rate")
    parser.add_argument(
        "--learn-anneal-epochs",
        nargs="+",
        type=int,
        default=[],
        help="Epochs to apply learning rate annealing (default every epoch)")
    parser.add_argument("--learn-decay",
                        type=float,
                        default=0.0,
                        help="L2 weight decay (not applied to biases). ")
    parser.add_argument("--epochs",
                        type=int,
                        default=30,
                        help="The number of training epochs")
    parser.add_argument("--epoch-start",
                        type=int,
                        default=0,
                        help="Epoch to start from")
    parser.add_argument("--subset-start",
                        type=int,
                        default=0,
                        help="Subset to start from")
    parser.add_argument("--max-samples",
                        type=int,
                        default=None,
                        help="Maximum samples to load from training set")
    parser.add_argument("--batch-size",
                        type=int,
                        default=32,
                        help="Size of each processing batch (per GPU)")
    parser.add_argument(
        "--batch-size-factor",
        type=int,
        default=1,
        help=
        "Batch size multiplier, use when desired batch size won't fit in memory."
    )
    parser.add_argument(
        "--batch-data-size",
        type=int,
        default=1,
        help="Number of batches to upload to GPU for processing")
    parser.add_argument("--seed",
                        type=int,
                        default=23455,
                        help="Random Seed for weights")
    parser.add_argument(
        "--split-seed",
        type=int,
        default=0,
        help="Random Seed for splitting into validation / training")
    parser.add_argument("--export-symbolic",
                        default=None,
                        help="Save datasets as symbolic links")
    parser.add_argument(
        "--distort-mode",
        default=[],
        nargs="+",
        help="Distortions to apply to training data (default, cifar10, disable)"
    )
    parser.add_argument(
        "--augment-mirror",
        default=False,
        action="store_true",
        help="Augment training data with horizontally mirrored copies")
    parser.add_argument("--skip-train",
                        default=False,
                        action="store_true",
                        help="Skip training of model")
    parser.add_argument("--skip-layer-updates",
                        type=int,
                        nargs="+",
                        default=[],
                        help="Skip training updates to specified layers")
    parser.add_argument("--model-desc",
                        default=[
                            "C100,7", "P2", "C150,4", "P2", "C250,4", "P2",
                            "C300,1", "CR"
                        ],
                        nargs="+",
                        type=str,
                        help="Network layer description")
    parser.add_argument(
        "--theano-flags",
        type=str,
        default="lib.cnmem=1.0",
        help="Additional THEANO_FLAGS environment variables for worker threads"
    )
    parser.add_argument("--restart",
                        default=False,
                        action="store_true",
                        help="Restart training of model")
    args = parser.parse_args()

    logging.init(args)

    #continue training
    args_fname = "./train.args"
    if args.restart:
        args = load_restart_args(args_fname, args)
    else:
        logging.info("Exporting arguments:", args_fname)
        with open(args_fname, "wb") as f:
            pickle.dump(args, f)

    #start MPI update server if this is master node:
    if not args.update_server is None and args.update_server[0] == "mpi":
        from mpi4py import MPI
        if MPI.COMM_WORLD.Get_rank() == 0:
            momentum = float(args.update_server[1])
            update_server = UpdateServer(args.model_dims,
                                         momentum=momentum,
                                         use_mpi=True,
                                         use_async=True)
            sys.exit(update_server.start())

    #set random seeds
    random.seed(args.seed)
    numpy.random.seed(args.seed)

    #load training dataset
    logging.info("Loading training data: " + str(args.train))
    train_data = dataset.load(args.train,
                              args.extension,
                              is_training=True,
                              thread_num=args.thread_num)
    data_shape = train_data.get_data_shape()
    class_num = train_data.get_class_num()
    class_labels = train_data.class_labels
    logging.info(
        "Found %i samples across %i class Labels:\n" %
        (train_data.get_total_size(), class_num), class_labels)

    #HACK to determine model parameter dimensions for shared models without initializing theano...
    #Not need any more in theano-0.8.0
    if not os.path.isfile(args.model_dims):
        logging.info("Exporting model dims file to " + args.model_dims)
        import model_cnn
        model = model_cnn.initialize(args, data_shape, class_labels, class_num)
        model.build_train_func(args.solver, skip_build=True)
        shared.ModelUpdate.save_dims(args.model_dims, model)
        logging.info("Done")
        exit(0)

    #construct worker processes (must be done before model due to Theano init! No longer true in theano 0.8.0):
    logging.info("Initializing worker procs for", args.gpus)
    workers = [
        WorkerProcess(gpu, args, data_shape, class_labels) for gpu in args.gpus
    ]

    #initialize model (and Theano)
    import model_cnn
    model = model_cnn.initialize(args, data_shape, class_labels, class_num)
    model.build_train_func(args.solver, skip_build=True)

    #mirror training data
    if args.augment_mirror:
        train_data.augment_mirror()

    #load test dataset
    if args.test:
        logging.info("Loading test: " + str(args.test))
        test_data = dataset.load(args.test,
                                 args.extension,
                                 is_training=False,
                                 class_labels=class_labels,
                                 thread_num=args.thread_num)
        logging.info("Testing: " + str(test_data.get_total_size()) +
                     " samples")
        assert (test_data.get_total_size() != 0)

    #connect with update server
    if not args.update_server is None:

        addr = args.update_server[0]
        use_mpi = bool(addr == "mpi")
        use_async = bool(len(args.update_server) == 2)
        port = 0 if use_mpi else int(args.update_server[1])
        offset = 0 if use_async else int(args.update_server[2])
        delta = 0 if use_async else int(args.update_server[3])

        logging.info(
            "Connecting to update server (async=%i, mpi=%i): " %
            (use_async, use_mpi), addr, port)
        sock = 0 if use_mpi else socket.create_connection((addr, port))
        update_client = UpdateClient(args.epoch_start, args.subset_start,
                                     train_data.subset_num, sock, use_async,
                                     use_mpi, offset, delta)
    else:
        update_client = UpdateClient(args.epoch_start, args.subset_start,
                                     train_data.subset_num)

    #perform training and save models
    if args.initial_tune > 0:
        logging.info("----- Initial Fine Tune -----")
        logging.info("Running initial tune with learning rate:",
                     args.initial_tune)
        run_train_epoch(args, update_client, workers, model, train_data,
                        args.initial_tune)

    #anneal learning rate
    learn_rate = args.learn_rate
    for epoch in range(0, args.epoch_start):
        if len(args.learn_anneal_epochs) == 0 or (
                epoch + 1) in args.learn_anneal_epochs:
            logging.verbose("Annealing learning rate")
            learn_rate *= args.learn_anneal

    #Run training
    best_test_error = 100.0
    for epoch in range(args.epoch_start, args.epochs):
        logging.info("----- Training Epoch: " + str(epoch) + " -----")

        #perform training and save models
        if not args.skip_train:
            logging.info("Training with learning rates " + str(learn_rate) +
                         " and momentum " + str(args.learn_momentum))
            timer = common.Timer()
            cost = run_train_epoch(args, update_client, workers, model,
                                   train_data, learn_rate)
            logging.info("Training - mean cost:", cost,
                         ", took %.0f sec" % timer.current())

        #anneal learning rate
        if len(args.learn_anneal_epochs) == 0 or (
                epoch + 1) in args.learn_anneal_epochs:
            logging.verbose("Annealing learning rate")
            learn_rate *= args.learn_anneal

        #perform testing
        test_error = 0
        if args.test and ((epoch % args.test_epochs) == 0
                          or epoch == (args.epochs - 1)):
            ts = time.time()
            test_error, test_class_errors = compute_error(
                workers, model, test_data)
            logging.info("Epoch %i Test Error: %.2f%%, Took %.0f sec" %
                         (epoch, test_error, time.time() - ts))
            save_results(args.output_prefix + "_epoch%03i.test" % epoch,
                         test_error, test_class_errors)

    logging.info("Finished Training")
Ejemplo n.º 11
0
def run_train_epoch(args, update_client, workers, model, train_data,
                    learn_rate):

    import model_cnn

    logging.info("Perform train...")
    batch_size_factor = args.batch_size_factor
    output_prefix = args.output_prefix
    model_dims = args.model_dims
    model_save_dt = args.model_save_dt * 60

    #update learning rates:
    for worker in workers:
        with worker.learn_rate.get_lock():
            worker.learn_rate.value = learn_rate

    #randomly shuffle data before each epoch, set seed to ensure each node has same data order
    random.seed(args.seed + update_client.epoch)
    train_data.shuffle()

    #perform initial sync so that all nodes have the same model
    model_update = shared.ModelUpdate(model_dims)
    model_update.import_updates(model)
    # update_client.sync(model_update, workers, initial=True)

    #get subset next
    subset_next = update_client.get_subset_next()

    #start export of data
    batch_size = len(workers) * model.batch_size * batch_size_factor
    logging.info(
        "SGD batch size is %ix%ix%i = %i" %
        (batch_size_factor, len(workers), model.batch_size, batch_size))
    export_thread = DatasetExportThread(model, train_data, subset_next,
                                        batch_size, True)

    #start processing
    total_cost = 0
    total_it = 0
    subset_current = subset_next
    epoch_current = update_client.epoch
    for worker in workers:
        worker.set_epoch(epoch_current)

    timer = common.Timer()
    timer_save = common.Timer()
    while subset_next >= 0:

        #wait until export is ready
        timer.reset()
        export_thread.wait()
        data_x, data_y, data_size = export_thread.get_export()
        subset_current = subset_next
        del export_thread
        if timer.current() > 1:
            logging.warning(
                "Warning: needed an additional %.1f seconds for dataset export"
                % timer.current())

        #print training classes for checking random seed etc
        logging.debug("Sample Metas: ", data_y[0:min(3, len(data_y))])

        #start exporting next subset
        subset_next = update_client.get_subset_next()
        if subset_next >= 0:
            export_thread = DatasetExportThread(model, train_data, subset_next,
                                                batch_size, True)

        # #store initial model before changes
        # model_update_delta = model_update.copy()

        logging.info("Evaluating training function")
        timer.reset()
        batch_num = data_x.shape[0] // model.batch_size
        it_num = batch_num // (len(workers) * batch_size_factor)
        index = 0
        subset_cost = 0

        while (index < batch_num):

            total_ts = time.time()

            def train_worker_thread(worker, indexs):
                worker.wait()
                worker.model_write(model_update)
                worker.train_begin()
                for i in indexs:
                    dx = data_x[i * model.batch_size:(i + 1) *
                                model.batch_size]
                    dy = data_y[i * model.batch_size:(i + 1) *
                                model.batch_size]
                    worker.train_step(dx, dy)
                    worker.wait()
                worker.train_end()
                worker.model_read()
                worker.wait()

            threads = []
            for worker in workers:
                worker_indexs = []
                for _ in range(batch_size_factor):
                    if index < batch_num:
                        worker_indexs.append(index)
                        index += 1

                t = threading.Thread(target=train_worker_thread,
                                     args=(worker, worker_indexs))
                t.start()
                threads.append((t, time.time()))

            proc_ts = []
            for t, start_ts in threads:
                t.join()
                proc_ts.append(int(1000 * (time.time() - start_ts)))

            #average models between GPUS and print batch info
            combine_ts = time.time()
            batch_cost = 0
            model_update.set_mean_init()
            for worker in workers:
                model_update.set_mean_update(worker.model_update)
                with worker.cost.get_lock():
                    batch_cost += worker.cost.value
            model_update.set_mean_finish()
            batch_cost /= len(workers)
            subset_cost += batch_cost
            it_index = index // (len(workers) * batch_size_factor)
            combine_ts = int(1000 * (time.time() - combine_ts))

            logging.verbose("Processing times (ms):", proc_ts,
                            ", Combine time: %i ms" % combine_ts)
            logging.info(
                "Subset %i/%i, Batch It %i/%i" %
                (subset_current + 1, train_data.subset_num, it_index, it_num),
                "- Cost:", batch_cost,
                "Time: %i ms" % (1000 * (time.time() - total_ts)))

        logging.info(
            "Training subset %i took %0.1f sec, mean cost:" %
            (subset_current + 1, timer.current()), subset_cost / it_num)
        total_it += it_num
        total_cost += subset_cost

        #update with server (if one exists)
        model_update.export_updates(model)
        # model_update_delta.set_delta(model_update)
        # update_client.update(model_update_delta, model_update, workers)

        #save intermediate models
        if timer_save.current() > model_save_dt and model_save_dt > 0:
            model_cnn.save_to_file(
                model, output_prefix + "_epoch%03i_subset%03i.mdl.gz" %
                (epoch_current, subset_current + 1))
            timer_save.reset()

    #perform final sync so that all nodes have the same model
    update_client.sync(model_update, workers)

    #save final models
    model_cnn.save_to_file(
        model, output_prefix + "_epoch%03i_final.mdl.gz" % (epoch_current))

    return (total_cost / total_it)
Ejemplo n.º 12
0
    def get_detections(self, model, data_x, data_m, params):

        pr_threshold = params.get("prThreshold", 0.01)
        nms_threshold = params.get("nmsThreshold", 0.5)
        corner_threshold = params.get("cornerThreshold", self.sparse_layer.corner_threshold)
        corner_max = params.get("cornerMax", 1024)
        use_soft_nms = params.get("useSoftNMS", 0) == 1
        t = (pr_threshold, nms_threshold, corner_threshold, corner_max)
        logging.verbose("Using detection params - pr threshold: %f, nms threshold: %f, corner_threshold: %f, corner_max: %i"%t)

        first_detect = False
        if self.detect_func is None:

            #get all model outputs
            outputs=[]

            if self.use_jointfit:
                det_fit = self.det_pr
                det_fit_null = det_fit[:, self.null_class, :, :]
                det_fit = det_fit[:,:self.class_num*self.fitness_num, :, :]
                det_fit = det_fit.reshape((self.batch_size, self.class_num, self.fitness_num, self.sample_num, self.sample_num))
                det_fit_pr = tensor.exp(det_fit)
                
                m = tensor.max(det_fit, axis=2)
                det_pr = m + tensor.log(tensor.sum(tensor.exp(det_fit - m[:,:,None,:,:]), axis=2))
                det_pr = tensor.concatenate([det_pr, det_fit_null[:,None,:,:]], axis=1)
                outputs.append(det_pr)

                val = [self.overlap_threshold[0] + i*(1.0 - self.overlap_threshold[0])/self.fitness_num for i in range(self.fitness_num)]
                fitness_val = theano.shared(numpy.array(val, dtype=numpy.float32))
                fitness = tensor.log(tensor.sum(det_fit_pr*fitness_val[None,None,:,None,None], axis=2))
                outputs.append(fitness)
            else:
                outputs.append(self.det_pr)

            if self.use_bbox_reg:
                outputs.append(self.bbox_predict)

            if self.use_indfit:
                outputs.append(tensor.exp(self.indfit_pr))

            logging.info("Building detection function")
            self.detect_func = theano.function([model.input], outputs, givens=[(get_train(), tensor.cast(0, 'int8'))], on_unused_input='ignore')

            logging.verbose("Exporting graph...")
            with open("detect_graph.txt", "w") as f:
                theano.printing.debugprint(self.detect_func, file=f, print_type=True)
            
            first_detect = True

        #get sampling bounding boxs
        logging.verbose("Detecting sample bboxs (%.2f)"%corner_threshold)
        timer = common.Timer()
        sample_bboxs = self.sparse_layer.get_samples(data_x, train=False, store_shared=True)
        timer.mark()
        logging.verbose("Found sample bboxs: {}".format([len(bbox) for bbox in sample_bboxs]))

        #upload sampling bounding boxs
        bboxs = self.sparse_layer.set_samples(sample_bboxs)
        timer.mark()

        #classify sampling bounding boxs
        r = list(self.detect_func(data_x))

        #get outputs
        if self.use_jointfit:
            det_pr = r[0]
            fitness = r[1]
            r_index = 2
        else:
            det_pr = r[0]
            fitness = numpy.copy(det_pr)
            r_index = 1
            
        if self.use_bbox_reg:
            bboxs = r[r_index]
            r_index += 1
        else:
            bboxs = self.sparse_layer.get_bbox_array(sample_bboxs)

            
        if self.use_indfit:
            indfit_pr = r[r_index]
            fitness_val = numpy.array([0.0] + [self.overlap_threshold[0] + i * (1.0 - self.overlap_threshold[0])/(self.fitness_num-1) for i in range(self.fitness_num-1)])
            fitness_exp = numpy.sum(indfit_pr*fitness_val[None,:,None,None], axis=1).astype(numpy.float32)
            fitness += numpy.log(fitness_exp)[:,None,:,:]
            r_index += 1
                

        timer.mark()
        sample_bbox_num = [len(s) for s in sample_bboxs]
        detlists = c_code.build_detections_nms(pr_threshold, nms_threshold, use_soft_nms, det_pr, fitness, bboxs, sample_bbox_num)
        timer.mark()

        logging.verbose("Found detections:", [len(detlist) for detlist in detlists])
        logging.verbose("FPS=%.1f, Timing (ms) - get samples: %i, upload: %i, classify: %i, build+nms %i"%tuple([self.batch_size / timer.current()] + timer.deltas_ms()))

        if not first_detect:
            global detect_time, detect_num
            detect_time += timer.current()
            detect_num += self.batch_size
            logging.info("Average FPS=%.1f"%(detect_num / detect_time))

        #results format
        results=[]
        for i, detlist in enumerate(detlists):
            results.append({"detections":detlist, "meta":data_m[i]})

        return results