def return_network(network_name,
                   pretrained=True,
                   n_classes=None,
                   dropout=None):

    if pretrained:
        if network_name == 'resnet-18':
            net = models.resnet18(pretrained=pretrained)
        elif network_name == 'resnet-50':
            net = models.resnet50(pretrained=pretrained)
        elif network_name == 'resnet-101':
            net = models.resnet101(pretrained=pretrained)
        elif network_name == 'densenet-121':
            net = models.densenet121(pretrained=pretrained)
        elif network_name == 'squeezenet1_1':
            net = models.squeezenet1_1(pretrained=pretrained)
        else:
            print("Provide a valid network, check python main.py -h")
            exit(-1)

        #transfer learning
        if 'densenet' in network_name:
            net.classifier = nn.Sequential(
                nn.Dropout(dropout),
                nn.Linear(net.classifier.in_features, n_classes, bias=True))
        elif 'squeezenet1_1' in network_name:
            net.classifier[1] = nn.Conv2d(512,
                                          n_classes,
                                          kernel_size=(1, 1),
                                          stride=(1, 1))
            net.num_classes = n_classes
        else:
            net.fc = nn.Sequential(
                nn.Dropout(dropout),
                nn.Linear(net.fc.in_features, n_classes, bias=True))

    else:
        if 'wideresnet' == network_name.split("-")[0]:
            depth, widenfactor = networks[network_name]
            net = Wide_ResNet(depth, widenfactor, dropout, n_classes)
        if 'densenet' == network_name.split("-")[0]:
            depth = networks[network_name]
            net = densenet(depth, n_classes, dropout)
        if 'resnet' == network_name.split("-")[0]:
            depth = networks[network_name]
            net = resnet(depth, n_classes, dropout)
        if 'mobilenetv2' == network_name:
            net = MobileNetV2(n_classes, dropout)

        parameters_fc = list()
        parameters_conv = list()
        for p in net.parameters():
            parameters_fc += [p]

    return net
Esempio n. 2
0
def getNetwork(args):
    if (args.net_type == 'alexnet'):
        net = models.alexnet(pretrained=args.finetune)
        file_name = 'alexnet'
    elif (args.net_type == 'vggnet'):
        if (args.depth == 11):
            net = models.vgg11(pretrained=args.finetune)
        elif (args.depth == 13):
            net = models.vgg13(pretrained=args.finetune)
        elif (args.depth == 16):
            net = models.vgg16(pretrained=args.finetune)
        elif (args.depth == 19):
            net = models.vgg19(pretrained=args.finetune)
        else:
            print(
                'Error : VGGnet should have depth of either [11, 13, 16, 19]')
            sys.exit(1)
        file_name = 'vgg-%s' % (args.depth)
    elif (args.net_type == 'densenet'):
        if (args.depth == 121):
            net = models.densenet121(pretrained=args.finetune)
        elif (args.depth == 161):
            net = models.densenet161(pretrained=args.finetune)
        elif (args.depth == 169):
            net = models.densenet169(pretrained=args.finetune)
        file_name = 'densenet-%s' % (args.depth)
    elif (args.net_type == 'resnet'):
        net = networks.resnet(args.finetune, args.depth)
        file_name = 'resnet-%s' % (args.depth)
    elif (args.net_type == 'xception'):
        net = pretrainedmodels.xception(num_classes=1000,
                                        pretrained='imagenet')
        file_name = 'xception'
    elif (args.net_type == 'inception'):
        net = models.inception_v3(num_classes=1000, pretrained=args.finetune)
        file_name = 'inception'
    else:
        print(
            'Error : Network should be either [alexnet / vggnet / resnet / densenet]'
        )
        sys.exit(1)

    return net, file_name
def create_openvideo_gt(video_dir, gt_src_dir, gt_dest_file, sum_rate = 0.15):

    duration = 16  # 16 frames for 3d cnn
    gt_dict = {}  # store ground-truth for this datase

    # build resnet class
    resnet = ResNet()
    new_size = (224, 224)

    video_path = video_dir + '/*.mpg'
    regex = r'(Frame)(\d+)(\.jpeg)'  # Frame#.jpeg

    for video in glob.glob(video_path):
        tokens = str(video).split('/')
        filename = (tokens[-1].split('.'))[0]
        video_fea = None    # all frame features

        # extract frame features (resnet101) per video
        vidcap = cv2.VideoCapture(video)  # major version of cv >= 3
        cnt = 0
        while vidcap.isOpened():
            success, image = vidcap.read()
            if success:
                print(os.path.join(filename, '%d.png') % cnt)
                image = cv2.resize(image, new_size)
                res_pool5 = resnet(image)
                # gpu variable -> cpu variable -> tensor -> numpy array -> 1D array
                frame_fea = res_pool5.cpu().data.numpy().flatten()
                if video_fea is not None:
                    video_fea = np.vstack((video_fea, frame_fea))
                else:
                    video_fea = frame_fea
                cnt += 1
            else:
                break

        fps = vidcap.get(cv2.CAP_PROP_FPS)
        num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

        cv2.destroyAllWindows()
        vidcap.release()

        # some frames cannot be read
        num_frames = min(num_frames, cnt)

        # segment video
        length = num_frames / fps     # in second
        kernel = np.dot(video_fea, video_fea.T)
        m = int(math.ceil(length / 2.0))  # maximum change points, each segment is about 2s
        cps, _ = cpd_auto(kernel, m, 1)

        cps = np.concatenate(([0], cps, [num_frames - 1]))

        # read manual annotation
        avg_scores = np.zeros(num_frames)
        for img in glob.glob(gt_src_dir + '/' + filename + '/*/*' + '.jpeg'):
            if re.search(regex, img):
                idx = int(re.search(regex, img).group(2))   # frame-base
                ks = idx - int(fps / 2)         # start frame idx
                ke = idx + int(fps / 2) + 1     # end frame idx
                # find maximum overlap with cps
                maxlap = 0
                mcs = 0
                mce = 0
                for i in range(len(cps) - 1):
                    cs = cps[i]             # current start frame idx
                    ce = cps[i + 1]         # current end frame idx
                    overlap = max(0, min(ce, ke) - max(cs, ks))     # calculate overlap
                    if overlap > maxlap:
                        maxlap = overlap
                        mcs = cs
                        mce = ce
                # record scores
                avg_scores[mcs : (mce + 1)] += 1
            else:
                continue

        # scale to 0.0 ~ 1.0
        max_score = max(avg_scores)
        min_score = min(avg_scores)
        avg_scores = (avg_scores - min_score) / (max_score - min_score)
        # delete the last several frames (< 16 frames)
        if len(avg_scores) % duration != 0:
            avg_scores = avg_scores[: -(len(avg_scores) % duration)]
        # calculate avg scores for every 16 frames
        clip_scores = [sum(x) / len(x) for x in chunked(avg_scores, duration)]
        # from floating point scores to binary scores
        key_idx = convert_bin(clip_scores, sum_rate)
        clip_bins = np.zeros(len(clip_scores))
        for x in key_idx:
            clip_bins[x] = 1

        # to plot avg scores, uncomment it
        # plot_scores(filename, avg_scores)
        # plot_scores(filename, clip_scores)
        # plot_scores(filename, clip_bins)

        gt_dict[filename] = clip_scores

    return gt_dict
Esempio n. 4
0
def get_model(x_images, y_labels, mode, dataset, args):
    """
    Define core model and loss function using inputs x_images and y_labels

    Parameters
    ----------
    x_images: Tensor
        model inputs with shape [batch_size, height, width, channels]
    y_labels: Tensor
        ground truth tensor. If args.mixup is True, then y_labels have shape [batch_size, num_classes],
        else [batch_size]
    mode: str
        Training mode. Valid values are [train, test]
    dataset: dict
        dataset object
    args:
        command line arguments

    Returns
    -------
    scaffold: dict
        A scaffold contains fetches, optimizer, metrics, summary writer, saver, etc.
    """
    inputs = K.Input(tensor=x_images)
    if "resnet_v2" in args.net_name:
        kwargs = {"kernel_initializer": "he_normal"}
        net = net_lib.resnet_v2(int(args.net_name.split("_")[2]),
                                args.init_channel or 64,
                                dataset["num_classes"],
                                dataset["first_downsample"],
                                args.drop_rate,
                                weight_decay=args.weight_decay,
                                **kwargs)
    elif "resnet" in args.net_name:
        kwargs = {"kernel_initializer": "he_normal"}
        net = net_lib.resnet(int(args.net_name.split("_")[1]),
                             args.init_channel or 64,
                             dataset["num_classes"],
                             dataset["first_downsample"],
                             args.drop_rate,
                             weight_decay=args.weight_decay,
                             **kwargs)
    elif "densenet" in args.net_name:
        net = net_lib.densenet(int(args.net_name.split("_")[1]),
                               dataset["num_classes"],
                               dataset["first_downsample"], args.init_channel,
                               args.growth_rate, args.layers_per_block,
                               not args.no_bottleneck, args.compression,
                               args.drop_rate, args.weight_decay)
    else:
        raise NotImplementedError

    y_logits = net(inputs)
    model = K.Model(inputs=inputs, outputs=y_logits)
    scaffold = {"model": model}

    with tf.name_scope("Loss"):
        if args.mixup:
            ce_loss = tf.losses.softmax_cross_entropy(y_labels, y_logits)
        else:
            ce_loss = tf.losses.sparse_softmax_cross_entropy(
                y_labels, y_logits)
        regu_loss = tf.add_n(model.losses)
        total_loss = ce_loss + regu_loss

    if mode == "train":
        optimizer = solver.Solver(args, dataset["train"]["steps"])
        scaffold["optimizer"] = optimizer
        scaffold["fetches"] = {
            "train_op": optimizer.minimize(total_loss, model.updates),
            "total_loss": total_loss,
            "regu_loss": regu_loss
        }
        # Define checkpoint saver and summary writer
        scaffold["writer"] = tf.summary.FileWriter(
            args.model_dir, graph=tf.get_default_graph())
        # Define summary
        tf.summary.image("image", x_images)
        tf.summary.scalar("learning_rate", optimizer.lr)
        scaffold["summaries"] = tf.summary.merge_all()
    elif mode == "test":
        scaffold["fetches"] = {
            "total_loss": total_loss,
            "regu_loss": regu_loss
        }
    else:
        raise NotImplementedError

    scaffold["saver"] = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
    if args.save_best_ckpt:
        scaffold["best_saver"] = tf.train.Saver(
            saver_def=scaffold["saver"].as_saver_def())

    with tf.name_scope("Metric"):
        y_pred = tf.argmax(y_logits, axis=1, output_type=tf.int32)
        if args.mixup:
            y_labels = tf.argmax(y_labels, axis=1, output_type=tf.int32)
        accuracy, acc_update = tf.metrics.accuracy(y_labels, y_pred)
    scaffold["metrics"] = {"acc": accuracy, "acc_up": acc_update}

    return scaffold