Example #1
0
        def distributed_test_epoch(epoch_num):
            summary_loss = AverageMeter()
            acc_score = ACCMeter()

            self.model.eval()
            t = time.time()
            with torch.no_grad():
                for step in range(self.val_ds.size):
                    images, data, target = self.train_ds()
                    images = torch.from_numpy(images).to(self.device).float()
                    data = torch.from_numpy(data).to(self.device).float()
                    target = torch.from_numpy(target).to(self.device).float()
                    batch_size = data.shape[0]

                    output = self.model(images, data)
                    loss = self.criterion(output, target)

                    summary_loss.update(loss.detach().item(), batch_size)
                    acc_score.update(target, output)

                    if step % cfg.TRAIN.log_interval == 0:

                        log_message = '[fold %d], '\
                                      'Val Step %d, ' \
                                      'summary_loss: %.6f, ' \
                                      'acc: %.6f, ' \
                                      'time: %.6f' % (
                                      self.fold,step, summary_loss.avg, acc_score.avg, time.time() - t)

                        logger.info(log_message)

            return summary_loss, acc_score
Example #2
0
    def load_weight(self):

        with self._graph.as_default():

            if cfg.MODEL.continue_train:
                #########################restore the params
                variables_restore = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                for v in tf.global_variables():
                    if 'moving_mean' in v.name or 'moving_variance' in v.name:
                            variables_restore.append(v)
                saver2 = tf.train.Saver(variables_restore)
                saver2.restore(self.sess, cfg.MODEL.pretrained_model)

            elif cfg.MODEL.pretrained_model is not None:
                #########################restore the params
                variables_restore = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=cfg.MODEL.net_structure)

                for v in tf.global_variables():
                    if 'moving_mean' in v.name or 'moving_variance' in v.name:
                        if cfg.MODEL.net_structure in v.name:
                            variables_restore.append(v)
                print(variables_restore)

                variables_restore_n = [v for v in variables_restore if
                                       'GN' not in v.name]  # Conv2d_1c_1x1 Bottleneck
                # print(variables_restore_n)
                saver2 = tf.train.Saver(variables_restore_n)
                saver2.restore(self.sess, cfg.MODEL.pretrained_model)
            else:
                logger.info('no pretrained model, train from sctrach')
Example #3
0
        def distributed_train_epoch(ds, epoch_num):
            total_loss = 0.0
            num_train_batches = 0.0
            for one_batch in ds:

                start = time.time()
                per_replica_loss = strategy.experimental_run_v2(
                    self.train_step, args=(one_batch, ))
                current_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,
                                               per_replica_loss,
                                               axis=None)
                total_loss += current_loss
                num_train_batches += 1
                self.iter_num += 1
                time_cost_per_batch = time.time() - start

                images_per_sec = cfg.TRAIN.batch_size / time_cost_per_batch

                if self.iter_num % cfg.TRAIN.log_interval == 0:
                    logger.info('epoch_num: %d, '
                                'iter_num: %d, '
                                'loss_value: %.6f,  '
                                'speed: %d images/sec ' %
                                (epoch_num, self.iter_num, current_loss,
                                 images_per_sec))

            return total_loss, num_train_batches
Example #4
0
        def distributed_test_epoch(epoch_num):
            summary_loss = AverageMeter()

            self.model.eval()
            t = time.time()
            with torch.no_grad():
                for step in range(self.val_ds.size):
                    feature, target1, target2 = self.val_ds()
                    feature = torch.from_numpy(feature).to(self.device).float()
                    target1 = torch.from_numpy(target1).to(self.device).float()
                    target2 = torch.from_numpy(target2).to(self.device).float()
                    batch_size = feature.shape[0]

                    output, output2 = self.model(feature)
                    loss1 = self.criterion(output, target1)
                    loss2 = self.criterion(output2, target2)

                    if self.pretrain:
                        loss = loss2
                    else:
                        loss = loss1

                    summary_loss.update(loss.detach().item(), batch_size)

                    if step % cfg.TRAIN.log_interval == 0:

                        log_message = '[fold %d], '\
                                      'Val Step %d, ' \
                                      'summary_loss: %.6f, ' \
                                      'time: %.6f' % (
                                      self.fold,step, summary_loss.avg, time.time() - t)

                        logger.info(log_message)

            return summary_loss
Example #5
0
    def load_weight(self):

        with self._graph.as_default():

            if cfg.MODEL.continue_train:
                #########################restore the params
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES)
                print(variables_restore)

                saver2 = tf.train.Saver(variables_restore)
                saver2.restore(self._sess, cfg.MODEL.pretrained_model)

            elif 'npy' in cfg.MODEL.pretrained_model:

                params_dict = np.load(cfg.MODEL.pretrained_model,
                                      allow_pickle=True).item()

                #########################restore the params
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES,
                    scope=cfg.MODEL.net_structure)

                ##filter
                if cfg.MODEL.cls != 1000:
                    variables_restore = [
                        x for x in variables_restore
                        if 'classifier' not in x.name
                    ]

                print(variables_restore)
                for i, variables in enumerate(variables_restore):

                    logger.info('assign %s with np data' % (variables.name))

                    self._sess.run(
                        variables.assign(params_dict[variables.name]))

            elif cfg.MODEL.pretrained_model is not None:
                #########################restore the params
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES,
                    scope=cfg.MODEL.net_structure)

                if cfg.MODEL.cls != 1000:
                    variables_restore = [
                        x for x in variables_restore
                        if 'classifier' not in x.name
                    ]
                print(variables_restore)
                saver2 = tf.train.Saver(variables_restore)
                saver2.restore(self._sess, cfg.MODEL.pretrained_model)

            else:
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES,
                    scope=cfg.MODEL.net_structure)
                print(variables_restore)
                logger.info('no pretrained model, train from sctrach')
Example #6
0
    def load_anns(self):
        with open(self.ann_json, 'r') as f:
            train_json_list = json.load(f)
        self.metas = train_json_list

        ###some change can be made here

        logger.info('the datasets contains %d samples' % (len(self.metas)))
    def get_image_annos(self):
        """Read JSON file, and get and check the image list.
        Skip missing images.
        """
        images_ids = self.coco.getImgIds()
        cats = self.coco.loadCats(self.coco.getCatIds())

        cat_klass_map = {}

        for _cat in cats:
            cat_klass_map[_cat['id']] = _cat['name']

        nms = [cat['name'] for cat in cats]
        print('COCO categories: \n{}\n'.format(' '.join(nms)))

        print(cat_klass_map)

        len_imgs = len(images_ids)
        for idx in range(len_imgs):

            images_info = self.coco.loadImgs([images_ids[idx]])
            image_path = os.path.join(self.image_base_dir,
                                      images_info[0]['file_name'])
            # filter that some images might not in the list
            if not os.path.exists(image_path):
                print(
                    "[skip] json annotation found, but cannot found image: {}".
                    format(image_path))
                continue

            annos_ids = self.coco.getAnnIds(imgIds=[images_ids[idx]])
            annos_info = self.coco.loadAnns(annos_ids)

            bboxs = []
            for ann in annos_info:

                if ann["iscrowd"]:
                    continue
                bbox = ann['bbox']
                cat = ann['category_id']
                klass = nms.index(cat_klass_map[cat])

                if bbox[2] < 1 or bbox[3] < 1:
                    continue

                bboxs.append([
                    bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3],
                    klass
                ])

            if len(bboxs) > 0:
                tmp_meta = CocoMeta_bbox(images_ids[idx], image_path, bboxs)
                self.metas.append(tmp_meta)

            # sort from the biggest person to the smallest one

        logger.info("Overall get {} valid images from {} and {}".format(
            len(self.metas), self.image_base_dir, self.anno_path))
Example #8
0
    def load_anns(self):
        with open(self.ann_file, 'r') as f:
            image_label_list = f.readlines()

        self.metas=image_label_list

            ###some change can be made here

        logger.info('the datasets contains %d samples'%(len(self.metas)))
Example #9
0
    def parse_file(self, im_root_path, ann_file):
        '''
        :return:
        '''
        logger.info("[x] Get dataset from {}".format(im_root_path))

        ann_info = data_info(im_root_path, ann_file)
        all_samples = ann_info.get_all_sample()

        return all_samples
    def parse_file(self,im_root_path,ann_file):
        '''
        :return:
        '''
        logger.info("[x] Get dataset from {}".format(im_root_path))

        ann_info = data_info(im_root_path, ann_file)
        all_samples = ann_info.get_all_sample()
        self.raw_data_set_size=len(all_samples)
        balanced_samples = self.balance(all_samples)
        return balanced_samples
Example #11
0
    def _train(self, _epoch):
        for step in range(cfg.TRAIN.iter_num_per_epoch):
            self.ite_num += 1
            start_time = time.time()

            example_images, example_labels = next(self.train_ds)

            ########show_flag check the data
            if cfg.TRAIN.vis:
                for i in range(cfg.TRAIN.batch_size):
                    example_image = example_images[i, :, :, :]
                    example_label = example_labels[i]

                    print(example_label)
                    cv2.namedWindow('img', 0)
                    cv2.imshow('img', example_image.astype(np.uint8))
                    cv2.waitKey(0)

            fetch_duration = time.time() - start_time

            for n in range(cfg.TRAIN.num_gpu):
                self.train_dict[self.inputs[0][n]] = example_images[
                    n * cfg.TRAIN.batch_size:(n + 1) *
                    cfg.TRAIN.batch_size, :, :, :]
                self.train_dict[self.inputs[1][n]] = example_labels[
                    n * cfg.TRAIN.batch_size:(n + 1) * cfg.TRAIN.batch_size]

            self.train_dict[self.inputs[2]] = True
            _, total_loss_value, loss_value, top1_acc_value, top5_acc_value, l2_loss_value, learn_rate, = \
                self._sess.run([*self.outputs],
                         feed_dict=self.train_dict)

            duration = time.time() - start_time
            run_duration = duration - fetch_duration
            if self.ite_num % cfg.TRAIN.log_interval == 0:
                num_examples_per_step = cfg.TRAIN.batch_size * cfg.TRAIN.num_gpu
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = duration / cfg.TRAIN.num_gpu

                format_str = ('epoch %d: iter %d, '
                              'total_loss=%.6f '
                              'loss=%.6f '
                              'top1 acc=%.6f '
                              'top5 acc=%.6f '
                              'l2_loss=%.6f '
                              'learn_rate =%e '
                              '(%.1f examples/sec; %.3f sec/batch) '
                              'fetch data time = %.6f'
                              'run time = %.6f')
                logger.info(format_str %
                            (_epoch, self.ite_num, total_loss_value,
                             loss_value, top1_acc_value, top5_acc_value,
                             l2_loss_value, learn_rate, examples_per_sec,
                             sec_per_batch, fetch_duration, run_duration))
Example #12
0
    def loop(self, ):

        self.build()
        self.load_weight()

        with self._graph.as_default():
            # Create a saver.
            self.saver = tf.train.Saver(tf.global_variables(),
                                        max_to_keep=None)

            logger.info('A tmp model  saved as %s \n' % saved_file)
            self.saver.save(self._sess, save_path=saved_file)
    def report(self):

        ## report
        message = ''
        message += ('top1 acc:%.6f\n' % (self.top1_correct / self.total))
        message += ('top5 acc:%.6f\n' % (self.top5_correct / self.total))
        message += ('%d samples \n' % self.total)

        logger.info(message)
        self.top1_correct = 0
        self.top5_correct = 0
        self.total = 0
Example #14
0
    def forward(self,
                inputs,
                boxes,
                labels,
                l2_regulation,
                training_flag,
                with_loss=True):

        ###preprocess
        inputs = self.preprocess(inputs)

        ### extract feature maps
        origin_fms = self.ssd_backbone(inputs, l2_regulation, training_flag)

        print(origin_fms)
        ### head, regresssion and class

        #### train as a dsfd  , anchor with 1 ratios per pixel ,   two shot
        logger.info('train with dsfd ')

        reg, cls = self.ssd_head(origin_fms,
                                 l2_regulation,
                                 training_flag,
                                 ratios_per_pixel=2)

        ### calculate loss

        reg_loss, cls_loss = ssd_loss(reg, cls, boxes, labels, 'ohem')
        ###### adjust the anchors to the image shape, but it trains with a fixed h,w

        ###adaptive anchor
        # h = tf.shape(inputs)[1]
        # w = tf.shape(inputs)[2]
        # anchors_ = get_all_anchors_fpn(max_size=[h, w])
        #
        # if cfg.MODEL.dual_mode:
        #     anchors_ = anchors_[0::2]
        # else:
        #     anchors_ = anchors_

        ###fix anchor
        anchors_ = anchor_tools.anchors / cfg.DATA.win
        # anchors_[:, 0] = anchors_[:, 0] / cfg.DATA.win
        # anchors_[:, 1] = anchors_[:, 1] / cfg.DATA.hin
        # anchors_[:, 2] = anchors_[:, 2] / cfg.DATA.win
        # anchors_[:, 3] = anchors_[:, 3] / cfg.DATA.hin

        self.postprocess(reg, cls, anchors_)

        return reg_loss, cls_loss
Example #15
0
        def distributed_train_epoch(epoch_num):
            total_loss = 0.0
            num_train_batches = 0.0

            self.model.train()
            for step in range(self.train_ds.size):

                start = time.time()

                images, target = self.train_ds()

                images_torch = torch.from_numpy(images)
                target_torch = torch.from_numpy(target)

                data, target = images_torch.to(self.device), target_torch.to(
                    self.device)

                output1, output2, output3 = self.model(data)

                loss1, loss2, loss3, acc1, acc2, acc3 = self.loss_function(
                    [output1, output2, output3], target)

                current_loss = loss1 + loss2 + loss3
                self.optimizer.zero_grad()
                current_loss.backward()
                self.optimizer.step()

                total_loss += current_loss
                num_train_batches += 1
                self.iter_num += 1
                time_cost_per_batch = time.time() - start

                images_per_sec = cfg.TRAIN.batch_size / time_cost_per_batch

                if self.iter_num % cfg.TRAIN.log_interval == 0:
                    logger.info(
                        'epoch_num: %d, '
                        'iter_num: %d, '
                        'loss1: %.6f, '
                        'acc1:  %.6f, '
                        'loss2: %.6f, '
                        'acc2:  %.6f, '
                        'loss3: %.6f, '
                        'acc3:  %.6f, '
                        'loss_value: %.6f,  '
                        'speed: %d images/sec ' %
                        (epoch_num, self.iter_num, loss1, acc1, loss2, acc2,
                         loss3, acc3, current_loss, images_per_sec))

            return total_loss, num_train_batches
Example #16
0
    def _map_func(self,dp,is_training):
        """Data augmentation function."""
        ####customed here



        try:
            fname, ann = dp
            image = cv2.imread(fname, cv2.IMREAD_COLOR)
            if cfg.DATA.rgb:
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            label = np.array(ann)

            if is_training:

                image=self.random_crop_resize(image)

                if random.uniform(0, 1) > 0.5:
                    image, _ = Mirror(image, label=None, symmetry=None)
                if random.uniform(0, 1) > 0.5:
                    angle = random.uniform(-45, 45)
                    image, _ = Rotate_aug(image, label=None, angle=angle)

                if random.uniform(0, 1) > 1.:
                    strength = random.uniform(0, 50)
                    image, _ = Affine_aug(image, strength=strength, label=None)

                if random.uniform(0, 1) > 0.5:
                    image=self.color_augmentor(image)
                if random.uniform(0, 1) > 1.0:
                    image=pixel_jitter(image,15)
                if random.uniform(0, 1) > 0.5:
                    image = Img_dropout(image, 0.2)

            else:
                ###centercrop
                image = self.center_crop(image)


            label = label.astype(np.int64)
            image= image.astype(np.uint8)
        except:
            logger.info('some err happended with %s'%fname, ' but handled with -1')
            image=np.zeros(shape=[cfg.MODEL.hin,cfg.MODEL.win,3],dtype=np.uint8)
            label = np.array(-1,dtype=np.int64)

        return image, label
Example #17
0
    def save(self):
        """Train faces data for a number of epoch."""

        self.build()
        self.load_weight()

        with self._graph.as_default():
            # Create a saver.
            self.saver = tf.train.Saver(tf.global_variables(),
                                        max_to_keep=None)

            logger.info('A tmp model  saved as %s \n' % saved_file)

            self.saver.save(self.sess, save_path=saved_file)

            self.sess.close()
Example #18
0
    def read_txt(self):
        with open(self.txt_file) as _f:
            txt_lines = _f.readlines()
        txt_lines.sort()
        for line in txt_lines:
            line = line.rstrip()

            _img_path = line.rsplit('| ', 1)[0]
            _label = line.rsplit('| ', 1)[-1]

            current_img_path = os.path.join(self.root_path, _img_path)
            current_img_label = _label
            self.metas.append([current_img_path, current_img_label])

            ###some change can be made here
        logger.info('the dataset contains %d images' % (len(txt_lines)))
        logger.info('the datasets contains %d samples' % (len(self.metas)))
Example #19
0
    def load_anns(self):
        with open(self.ann_file, 'r') as f:
            image_label_list = f.readlines()

        for line in image_label_list:
            cur_data_info = line.rstrip().split('|')
            fname = cur_data_info[0]
            label = cur_data_info[1]

            image_path = os.path.join(self.root_path, fname)
            self.metas.append([image_path, label])

            ###some change can be made here

        logger.info('the datasets contains %d samples' %
                    (len(image_label_list)))
        logger.info('the datasets contains %d samples after filter' %
                    (len(self.metas)))
Example #20
0
    def train_loop(self):
        """Train faces data for a number of epoch."""

        self.build()
        self.load_weight()



        

        with self._graph.as_default():
            # Create a saver.
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)

            # Build the summary operation from the last tower summaries.
            self.summary_op = tf.summary.merge(self.summaries)

            self.summary_writer = tf.summary.FileWriter(cfg.MODEL.model_path, self.sess.graph)


            min_loss_control=1000.
            for epoch in range(cfg.TRAIN.epoch):
                self._train(epoch)
                val_loss=self._val(epoch)
                logger.info('**************'
                           'val_loss %f '%(val_loss))

                #tmp_model_name=cfg.MODEL.model_path + \
                #               'epoch_' + str(epoch ) + \
                #               'L2_' + str(cfg.TRAIN.weight_decay_factor) + \
                #               '.ckpt'
                #logger.info('save model as %s \n'%tmp_model_name)
                #self.saver.save(self.sess, save_path=tmp_model_name)

                if 1:
                    min_loss_control=val_loss
                    low_loss_model_name = cfg.MODEL.model_path + \
                                     'epoch_' + str(epoch) + \
                                     'L2_' + str(cfg.TRAIN.weight_decay_factor)  + '.ckpt'
                    logger.info('A new low loss model  saved as %s \n' % low_loss_model_name)
                    self.saver.save(self.sess, save_path=low_loss_model_name)

            self.sess.close()
Example #21
0
    def forward(self,
                inputs,
                boxes,
                labels,
                l2_regulation,
                training_flag,
                with_loss=True):

        ###preprocess
        inputs = self.preprocess(inputs)

        ### extract feature maps
        origin_fms = self.ssd_backbone(inputs, l2_regulation, training_flag)

        print(origin_fms)
        ### head, regresssion and class

        #### train as a dsfd  , anchor with 1 ratios per pixel ,   two shot
        logger.info('train with dsfd ')

        reg, cls = self.ssd_head(origin_fms,
                                 l2_regulation,
                                 training_flag,
                                 ratios_per_pixel=2)

        ### calculate loss

        reg_loss, cls_loss = ssd_loss(reg, cls, boxes, labels, 'focal_loss')
        ###### adjust the anchors to the image shape, but it trains with a fixed h,w

        ###adaptive anchor
        h = tf.shape(inputs)[1]
        w = tf.shape(inputs)[2]
        anchors_ = get_all_anchors_fpn(max_size=[h, w])

        self.postprocess(reg, cls, anchors_)

        return reg_loss, cls_loss
Example #22
0
    def parse_file(self,feature,target,extra_target):




        train_features = feature
        labels_train = target
        extra_labels_train = extra_target

        def preprocess(df):
            """Returns preprocessed data frame"""
            df = df.copy()
            df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
            df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
            df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({24: 0, 48: 1, 72: 2})

            return df

        train_features=preprocess(train_features)

        ####filter control
        if cfg.DATA.filter_ctl_vehicle:
            filter_index = train_features['cp_type'] != 1
            train_features = train_features[filter_index]
            labels_train = labels_train[filter_index]
            extra_labels_train = extra_labels_train[filter_index]

        train_features = train_features.drop(['sig_id', 'fold' ], axis=1).values

        labels_train = labels_train.drop('sig_id', axis=1).values
        extra_labels_train = extra_labels_train.drop('sig_id', axis=1).values


        logger.info('dataset contains %d samples'%(train_features.shape[0]))

        return train_features,labels_train,extra_labels_train
Example #23
0
    def load_weight(self):

        with self._graph.as_default():

            if cfg.MODEL.continue_train:
                #########################restore the params
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES)
                print(variables_restore)

                saver2 = tf.train.Saver(variables_restore)
                saver2.restore(self._sess, cfg.MODEL.pretrained_model)

            elif cfg.MODEL.pretrained_model is not None and not cfg.MODEL.pruning:
                #########################restore the params
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES,
                    scope=cfg.MODEL.net_structure)
                print(variables_restore)

                saver2 = tf.train.Saver(variables_restore)
                saver2.restore(self._sess, cfg.MODEL.pretrained_model)
            elif cfg.MODEL.pruning:
                #########################restore the params
                variables_restore = tf.get_collection(
                    tf.GraphKeys.MODEL_VARIABLES)
                print(variables_restore)
                #    print('......................................................')
                #    # saver2 = tf.train.Saver(variables_restore)
                variables_restore_n = [
                    v for v in variables_restore if 'output' not in v.name
                ]  # Conv2d_1c_1x1 Bottleneck
                # print(variables_restore_n)

                state_dict = np.load(cfg.MODEL.pretrained_model)

                state_dict = state_dict['arr_0'][()]

                for var in variables_restore_n:
                    var_name = var.name.rsplit(':')[0]
                    if var_name in state_dict:
                        logger.info('recover %s from npz file' % var_name)
                        self._sess.run(tf.assign(var, state_dict[var_name]))
                    else:
                        logger.info('the params of %s not in npz file' %
                                    var_name)
            else:
                logger.info('no pretrained model, train from sctrach')
Example #24
0
from lib.helper.logger import logger
from lib.core.base_trainer.net_work import Train
from lib.dataset.dataietr import DataIter
from lib.core.model.ShuffleNet_Series.ShuffleNetV2.network import ShuffleNetV2

from lib.core.model.semodel.SeResnet import se_resnet50
import cv2
import numpy as np

from train_config import config as cfg
import setproctitle

logger.info('The trainer start')

setproctitle.setproctitle("face*_*_")


def main():

    ###build dataset
    train_ds = DataIter(cfg.DATA.root_path, cfg.DATA.train_txt_path, True)
    test_ds = DataIter(cfg.DATA.root_path, cfg.DATA.val_txt_path, False)

    ###build trainer
    trainer = Train(train_ds=train_ds, val_ds=test_ds)

    trainer.load_weight()
    if cfg.TRAIN.vis:
        for step in range(train_ds.size):

            images, labels = train_ds()
def SSD(images,boxes,labels,L2_reg,training=True):
    images=preprocess(images)

    if 'MobilenetV1' in cfg.MODEL.net_structure:
        ssd_backbne=mobilenet_ssd
    elif 'resnet' in cfg.MODEL.net_structure:
        ssd_backbne = resnet_ssd
    elif 'vgg' in cfg.MODEL.net_structure:
        ssd_backbne = vgg_ssd
    elif 'efficientnet' in cfg.MODEL.net_structure:
        ssd_backbne= efficient_ssd
    else:
        ssd_backbne=None
        print('a net structure that not supported')

    origin_fms,enhanced_fms=ssd_backbne(images, L2_reg, training)


    print('origin_fms', origin_fms)
    print('enhanced_fms', enhanced_fms)


    with tf.variable_scope('ssd'):

        if not cfg.MODEL.fpn and not cfg.MODEL.dual_mode:

            logger.info('the model was trained as a plain ssd')
            reg_final, cla_final=ssd_out(origin_fms, L2_reg, training)

            reg_loss, cla_loss = ssd_loss(reg_final, cla_final, boxes, labels, 'ohem')
        elif  cfg.MODEL.fpn and not cfg.MODEL.dual_mode:
            logger.info('the model was trained without dual shot')
            reg_final, cla_final = ssd_out(enhanced_fms, L2_reg, training)
            reg_loss, cla_loss = ssd_loss(reg_final, cla_final, boxes, labels, 'ohem')

        elif cfg.MODEL.dual_mode:
            logger.info('the model was trained with dual shot, FEM')
            reg, cla= ssd_out(origin_fms, L2_reg, training,1)
            boxes_small=boxes[:,1::2]
            label_small=labels[:,1::2]

            reg_loss, cla_loss = ssd_loss(reg, cla, boxes_small, label_small, 'ohem')

            with tf.variable_scope('dual'):


                reg_final, cla_final = ssd_out(enhanced_fms, L2_reg, training,1)

                boxes_norm = boxes[:, 0::2]
                label_norm = labels[:, 0::2]

                reg_loss_dual, cla_loss_dual = ssd_loss(reg_final, cla_final, boxes_norm, label_norm,'ohem')


            reg_loss=(reg_loss+reg_loss_dual)
            cla_loss=(cla_loss+cla_loss_dual)


    ###### make it easy to adjust the anchors,      but it trains with a fixed h,w
    h = tf.shape(images)[1]
    w = tf.shape(images)[2]
    anchors_=get_all_anchors_fpn(max_size=[h,w])

    if cfg.MODEL.dual_mode:
        anchors_ = anchors_[0::2]
    else:
        anchors_ = anchors_
    get_predictions(reg_final,cla_final,anchors_)

    return reg_loss,cla_loss
Example #26
0
    def custom_loop(self):
        """Custom training and testing loop.
    Args:
      train_dist_dataset: Training dataset created using strategy.
      test_dist_dataset: Testing dataset created using strategy.
      strategy: Distribution strategy.
    Returns:
      train_loss, train_accuracy, test_loss, test_accuracy
    """
        def distributed_train_epoch(epoch_num):
            total_loss = 0.0
            num_train_batches = 0.0

            self.model.train()
            for step in range(self.train_ds.size):

                start = time.time()

                images, target = self.train_ds()

                images_torch = torch.from_numpy(images)
                target_torch = torch.from_numpy(target)

                data, target = images_torch.to(self.device), target_torch.to(
                    self.device)

                output1, output2, output3 = self.model(data)

                loss1, loss2, loss3, acc1, acc2, acc3 = self.loss_function(
                    [output1, output2, output3], target)

                current_loss = loss1 + loss2 + loss3
                self.optimizer.zero_grad()
                current_loss.backward()
                self.optimizer.step()

                total_loss += current_loss
                num_train_batches += 1
                self.iter_num += 1
                time_cost_per_batch = time.time() - start

                images_per_sec = cfg.TRAIN.batch_size / time_cost_per_batch

                if self.iter_num % cfg.TRAIN.log_interval == 0:
                    logger.info(
                        'epoch_num: %d, '
                        'iter_num: %d, '
                        'loss1: %.6f, '
                        'acc1:  %.6f, '
                        'loss2: %.6f, '
                        'acc2:  %.6f, '
                        'loss3: %.6f, '
                        'acc3:  %.6f, '
                        'loss_value: %.6f,  '
                        'speed: %d images/sec ' %
                        (epoch_num, self.iter_num, loss1, acc1, loss2, acc2,
                         loss3, acc3, current_loss, images_per_sec))

            return total_loss, num_train_batches

        def distributed_test_epoch(epoch_num):
            total_loss = 0.
            total_acc1 = 0.
            total_acc2 = 0.
            total_acc3 = 0.
            num_test_batches = 0.0
            self.model.eval()
            with torch.no_grad():
                for i in range(self.val_ds.size):
                    images, target = self.val_ds()
                    images_torch = torch.from_numpy(images)
                    target_torch = torch.from_numpy(target)

                    data, target = images_torch.to(
                        self.device), target_torch.to(self.device)

                    output1, output2, output3 = self.model(data)

                    loss1, loss2, loss3, acc1, acc2, acc3 = self.loss_function(
                        [output1, output2, output3], target)

                    cur_loss = loss1 + loss2 + loss3
                    total_loss += cur_loss
                    total_acc1 += acc1
                    total_acc2 += acc2
                    total_acc3 += acc3
                    num_test_batches += 1
            return total_loss,\
                   total_acc1,\
                   total_acc2,\
                   total_acc3, \
                   num_test_batches

        for epoch in range(self.epochs):
            self.scheduler.step()
            for param_group in self.optimizer.param_groups:
                lr = param_group['lr']
            logger.info('learning rate: [%f]' % (lr))
            start = time.time()

            train_total_loss, num_train_batches = distributed_train_epoch(
                epoch)
            test_total_loss, test_total_acc1, test_total_acc2, test_total_acc3, num_test_batches = distributed_test_epoch(
                epoch)

            time_consume_per_epoch = time.time() - start
            training_massage = 'Epoch: %d, ' \
                               'Train Loss: %.6f, ' \
                               'Test Loss: %.6f ' \
                               'Test acc1: %.6f '\
                               'Test acc2: %.6f '\
                               'Test acc3: %.6f '\
                               'Time consume: %.2f'%(epoch,
                                                     train_total_loss / num_train_batches,
                                                     test_total_loss / num_test_batches,
                                                     test_total_acc1 / num_test_batches,
                                                     test_total_acc2 / num_test_batches,
                                                     test_total_acc3 / num_test_batches,
                                                     time_consume_per_epoch)

            logger.info(training_massage)

            #### save the model every end of epoch
            current_model_saved_name = './model/epoch_%d_val_loss%.6f.pth' % (
                epoch, test_total_loss / num_test_batches)

            logger.info('A model saved to %s' % current_model_saved_name)

            if not os.access(cfg.MODEL.model_path, os.F_OK):
                os.mkdir(cfg.MODEL.model_path)

            torch.save(self.model.state_dict(), current_model_saved_name)
            # save_checkpoint({
            #           'state_dict': self.model.state_dict(),
            #           },iters=epoch,tag=current_model_saved_name)

        return (train_total_loss / num_train_batches,
                test_total_loss / num_test_batches)
Example #27
0
from lib.helper.logger import logger
from lib.core.base_trainer.network import Train
import setproctitle

import cv2
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)

logger.info('train start')
setproctitle.setproctitle("detect")

trainner = Train()

trainner.custom_loop()
    def custom_loop(self, train_dist_dataset, test_dist_dataset, strategy):
        """Custom training and testing loop.
    Args:
      train_dist_dataset: Training dataset created using strategy.
      test_dist_dataset: Testing dataset created using strategy.
      strategy: Distribution strategy.
    Returns:
      train_loss, train_accuracy, test_loss, test_accuracy
    """
        def distributed_train_epoch(ds, epoch_num):
            total_loss = 0.0
            num_train_batches = 0.0
            for one_batch in ds:

                start = time.time()
                per_replica_loss = strategy.experimental_run_v2(
                    self.train_step, args=(one_batch, ))
                current_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,
                                               per_replica_loss,
                                               axis=None)
                total_loss += current_loss
                num_train_batches += 1
                self.iter_num += 1
                time_cost_per_batch = time.time() - start

                images_per_sec = cfg.TRAIN.batch_size / time_cost_per_batch

                if self.iter_num % cfg.TRAIN.log_interval == 0:
                    logger.info('epoch_num: %d, '
                                'iter_num: %d, '
                                'loss_value: %.6f,  '
                                'speed: %d images/sec ' %
                                (epoch_num, self.iter_num, current_loss,
                                 images_per_sec))

            return total_loss, num_train_batches

        def distributed_test_epoch(ds, epoch_num):
            total_loss = 0.
            num_test_batches = 0.0
            for one_batch in ds:
                per_replica_loss = strategy.experimental_run_v2(
                    self.test_step, args=(one_batch, ))

                current_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,
                                               per_replica_loss,
                                               axis=None)
                total_loss += current_loss
                num_test_batches += 1
            return total_loss, num_test_batches

        if self.enable_function:
            distributed_train_epoch = tf.function(distributed_train_epoch)
            distributed_test_epoch = tf.function(distributed_test_epoch)

        for epoch in range(self.epochs):

            start = time.time()
            self.optimizer.learning_rate = self.decay(epoch)

            train_total_loss, num_train_batches = distributed_train_epoch(
                train_dist_dataset, epoch)
            test_total_loss, num_test_batches = distributed_test_epoch(
                test_dist_dataset, epoch)

            time_consume_per_epoch = time.time() - start
            training_massage = 'Epoch: %d, ' \
                               'Train Loss: %.6f, ' \
                               'Test Loss: %.6f '\
                               'Time consume: %.2f'%(epoch,
                                                     train_total_loss / num_train_batches,
                                                     test_total_loss / num_test_batches,
                                                     time_consume_per_epoch)

            logger.info(training_massage)

            #### save the model every end of epoch
            current_model_saved_name = os.path.join(
                cfg.MODEL.model_path, 'epoch_%d_val_loss%.6f' %
                (epoch, test_total_loss / num_test_batches))

            if not os.access(cfg.MODEL.model_path, os.F_OK):
                os.mkdir(cfg.MODEL.model_path)

            tf.saved_model.save(self.model, current_model_saved_name)

            logger.info('A model saved to %s' % current_model_saved_name)

        return (train_total_loss / num_train_batches,
                test_total_loss / num_test_batches)
Example #29
0
    def forward(self,
                inputs,
                boxes,
                labels,
                l2_regulation,
                training_flag,
                with_loss=True):

        ###preprocess
        inputs = self.preprocess(inputs)

        ### extract feature maps
        origin_fms, enhanced_fms = self.ssd_backbone(inputs, l2_regulation,
                                                     training_flag)

        ### head, regresssion and class
        if cfg.MODEL.dual_mode and cfg.MODEL.fpn:
            #### train as a dsfd  , anchor with 1 ratios per pixel ,   two shot
            logger.info('train with dsfd ')
            ###first shot
            origin_reg, origin_cls = self.ssd_head(origin_fms,
                                                   l2_regulation,
                                                   training_flag,
                                                   ratios_per_pixel=1)
            ###second shot
            with tf.variable_scope('dual'):
                final_reg, final_cls = self.ssd_head(enhanced_fms,
                                                     l2_regulation,
                                                     training_flag,
                                                     ratios_per_pixel=1)

            ### calculate loss
            if with_loss:
                ## first shot anchors
                boxes_small = boxes[:, 1::2]
                label_small = labels[:, 1::2]
                ## first shot loss
                reg_loss, cls_loss = ssd_loss(origin_reg, origin_cls,
                                              boxes_small, label_small, 'ohem')

                ## second shot anchors
                boxes_norm = boxes[:, 0::2]
                label_norm = labels[:, 0::2]
                ## second shot loss
                with tf.name_scope('dual'):
                    final_reg_loss, final_cls_loss_dual = ssd_loss(
                        final_reg, final_cls, boxes_norm, label_norm, 'ohem')

                reg_loss = (reg_loss + final_reg_loss)
                cls_loss = (cls_loss + final_cls_loss_dual)

        elif cfg.MODEL.fpn:
            #### train as a plain ssd with fpn  , anchor with 2 ratios per pixel
            logger.info('train with a ssd with fpn ')
            with tf.variable_scope('dual'):
                final_reg, final_cls = self.ssd_head(enhanced_fms,
                                                     l2_regulation,
                                                     training_flag)

            ### calculate loss
            if with_loss:
                reg_loss, cls_loss = ssd_loss(final_reg, final_cls, boxes,
                                              labels, 'ohem')

        else:
            #### train as a plain ssd , anchor with 2 ratios per pixel
            logger.info('train with a plain ssd')
            final_reg, final_cls = self.ssd_head(origin_fms, l2_regulation,
                                                 training_flag)
            ### calculate loss
            if with_loss:
                reg_loss, cls_loss = ssd_loss(final_reg, final_cls, boxes,
                                              labels, 'ohem')

        ###### adjust the anchors to the image shape, but it trains with a fixed h,w

        h = tf.shape(inputs)[1]
        w = tf.shape(inputs)[2]
        anchors_ = get_all_anchors_fpn(max_size=[h, w])

        if cfg.MODEL.dual_mode:
            anchors_ = anchors_[0::2]
        else:
            anchors_ = anchors_

        self.postprocess(final_reg, final_cls, anchors_)

        return reg_loss, cls_loss
    def balance(self,anns):
        res_anns = copy.deepcopy(anns)

        lar_count = 0
        for ann in anns:

            ### 300w  balance,  according to keypoints
            if ann['keypoints'] is not None:

                label = ann['keypoints']
                label = np.array(label, dtype=np.float).reshape((-1, 2))
                bbox = ann['bbox']
                bbox_width = bbox[2] - bbox[0]
                bbox_height = bbox[3] - bbox[1]

                if bbox_width < 50 or bbox_height < 50:
                    res_anns.remove(ann)

                left_eye_close = np.sqrt(
                    np.square(label[37, 0] - label[41, 0]) +
                    np.square(label[37, 1] - label[41, 1])) / bbox_height < self.eye_close_thres \
                    or np.sqrt(np.square(label[38, 0] - label[40, 0]) +
                               np.square(label[38, 1] - label[40, 1])) / bbox_height < self.eye_close_thres
                right_eye_close = np.sqrt(
                    np.square(label[43, 0] - label[47, 0]) +
                    np.square(label[43, 1] - label[47, 1])) / bbox_height <  self.eye_close_thres \
                    or np.sqrt(np.square(label[44, 0] - label[46, 0]) +
                               np.square(label[44, 1] - label[46, 1])) / bbox_height < self.eye_close_thres
                if left_eye_close or right_eye_close:
                    for i in range(10):
                        res_anns.append(ann)
                ###half face
                if np.sqrt(np.square(label[36, 0] - label[45, 0]) +
                           np.square(label[36, 1] - label[45, 1])) / bbox_width < 0.5:
                    for i in range(20):
                        res_anns.append(ann)

                if np.sqrt(np.square(label[62, 0] - label[66, 0]) +
                           np.square(label[62, 1] - label[66, 1])) / bbox_height > 0.15:
                    for i in range(20):
                        res_anns.append(ann)

                if np.sqrt(np.square(label[62, 0] - label[66, 0]) +
                           np.square(label[62, 1] - label[66, 1])) / cfg.MODEL.hin > self.big_mouth_open_thres:
                    for i in range(50):
                        res_anns.append(ann)
                ##########eyes diff aug
                if left_eye_close and not right_eye_close:
                    for i in range(40):
                        res_anns.append(ann)
                    lar_count += 1
                if not left_eye_close and right_eye_close:
                    for i in range(40):
                        res_anns.append(ann)
                    lar_count += 1

            # elif ann['attr'] is not None:
            #
            #     ###celeba data,
            #     if ann['attr'][0]>0:
            #         for i in range(10):
            #             res_anns.append(ann)





        logger.info('befor balance the dataset contains %d images' % (len(anns)))
        logger.info('after balanced the datasets contains %d samples' % (len(res_anns)))

        random.shuffle(res_anns)
        return res_anns