def return_network(network_name, pretrained=True, n_classes=None, dropout=None): if pretrained: if network_name == 'resnet-18': net = models.resnet18(pretrained=pretrained) elif network_name == 'resnet-50': net = models.resnet50(pretrained=pretrained) elif network_name == 'resnet-101': net = models.resnet101(pretrained=pretrained) elif network_name == 'densenet-121': net = models.densenet121(pretrained=pretrained) elif network_name == 'squeezenet1_1': net = models.squeezenet1_1(pretrained=pretrained) else: print("Provide a valid network, check python main.py -h") exit(-1) #transfer learning if 'densenet' in network_name: net.classifier = nn.Sequential( nn.Dropout(dropout), nn.Linear(net.classifier.in_features, n_classes, bias=True)) elif 'squeezenet1_1' in network_name: net.classifier[1] = nn.Conv2d(512, n_classes, kernel_size=(1, 1), stride=(1, 1)) net.num_classes = n_classes else: net.fc = nn.Sequential( nn.Dropout(dropout), nn.Linear(net.fc.in_features, n_classes, bias=True)) else: if 'wideresnet' == network_name.split("-")[0]: depth, widenfactor = networks[network_name] net = Wide_ResNet(depth, widenfactor, dropout, n_classes) if 'densenet' == network_name.split("-")[0]: depth = networks[network_name] net = densenet(depth, n_classes, dropout) if 'resnet' == network_name.split("-")[0]: depth = networks[network_name] net = resnet(depth, n_classes, dropout) if 'mobilenetv2' == network_name: net = MobileNetV2(n_classes, dropout) parameters_fc = list() parameters_conv = list() for p in net.parameters(): parameters_fc += [p] return net
def getNetwork(args): if (args.net_type == 'alexnet'): net = models.alexnet(pretrained=args.finetune) file_name = 'alexnet' elif (args.net_type == 'vggnet'): if (args.depth == 11): net = models.vgg11(pretrained=args.finetune) elif (args.depth == 13): net = models.vgg13(pretrained=args.finetune) elif (args.depth == 16): net = models.vgg16(pretrained=args.finetune) elif (args.depth == 19): net = models.vgg19(pretrained=args.finetune) else: print( 'Error : VGGnet should have depth of either [11, 13, 16, 19]') sys.exit(1) file_name = 'vgg-%s' % (args.depth) elif (args.net_type == 'densenet'): if (args.depth == 121): net = models.densenet121(pretrained=args.finetune) elif (args.depth == 161): net = models.densenet161(pretrained=args.finetune) elif (args.depth == 169): net = models.densenet169(pretrained=args.finetune) file_name = 'densenet-%s' % (args.depth) elif (args.net_type == 'resnet'): net = networks.resnet(args.finetune, args.depth) file_name = 'resnet-%s' % (args.depth) elif (args.net_type == 'xception'): net = pretrainedmodels.xception(num_classes=1000, pretrained='imagenet') file_name = 'xception' elif (args.net_type == 'inception'): net = models.inception_v3(num_classes=1000, pretrained=args.finetune) file_name = 'inception' else: print( 'Error : Network should be either [alexnet / vggnet / resnet / densenet]' ) sys.exit(1) return net, file_name
def create_openvideo_gt(video_dir, gt_src_dir, gt_dest_file, sum_rate = 0.15): duration = 16 # 16 frames for 3d cnn gt_dict = {} # store ground-truth for this datase # build resnet class resnet = ResNet() new_size = (224, 224) video_path = video_dir + '/*.mpg' regex = r'(Frame)(\d+)(\.jpeg)' # Frame#.jpeg for video in glob.glob(video_path): tokens = str(video).split('/') filename = (tokens[-1].split('.'))[0] video_fea = None # all frame features # extract frame features (resnet101) per video vidcap = cv2.VideoCapture(video) # major version of cv >= 3 cnt = 0 while vidcap.isOpened(): success, image = vidcap.read() if success: print(os.path.join(filename, '%d.png') % cnt) image = cv2.resize(image, new_size) res_pool5 = resnet(image) # gpu variable -> cpu variable -> tensor -> numpy array -> 1D array frame_fea = res_pool5.cpu().data.numpy().flatten() if video_fea is not None: video_fea = np.vstack((video_fea, frame_fea)) else: video_fea = frame_fea cnt += 1 else: break fps = vidcap.get(cv2.CAP_PROP_FPS) num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) cv2.destroyAllWindows() vidcap.release() # some frames cannot be read num_frames = min(num_frames, cnt) # segment video length = num_frames / fps # in second kernel = np.dot(video_fea, video_fea.T) m = int(math.ceil(length / 2.0)) # maximum change points, each segment is about 2s cps, _ = cpd_auto(kernel, m, 1) cps = np.concatenate(([0], cps, [num_frames - 1])) # read manual annotation avg_scores = np.zeros(num_frames) for img in glob.glob(gt_src_dir + '/' + filename + '/*/*' + '.jpeg'): if re.search(regex, img): idx = int(re.search(regex, img).group(2)) # frame-base ks = idx - int(fps / 2) # start frame idx ke = idx + int(fps / 2) + 1 # end frame idx # find maximum overlap with cps maxlap = 0 mcs = 0 mce = 0 for i in range(len(cps) - 1): cs = cps[i] # current start frame idx ce = cps[i + 1] # current end frame idx overlap = max(0, min(ce, ke) - max(cs, ks)) # calculate overlap if overlap > maxlap: maxlap = overlap mcs = cs mce = ce # record scores avg_scores[mcs : (mce + 1)] += 1 else: continue # scale to 0.0 ~ 1.0 max_score = max(avg_scores) min_score = min(avg_scores) avg_scores = (avg_scores - min_score) / (max_score - min_score) # delete the last several frames (< 16 frames) if len(avg_scores) % duration != 0: avg_scores = avg_scores[: -(len(avg_scores) % duration)] # calculate avg scores for every 16 frames clip_scores = [sum(x) / len(x) for x in chunked(avg_scores, duration)] # from floating point scores to binary scores key_idx = convert_bin(clip_scores, sum_rate) clip_bins = np.zeros(len(clip_scores)) for x in key_idx: clip_bins[x] = 1 # to plot avg scores, uncomment it # plot_scores(filename, avg_scores) # plot_scores(filename, clip_scores) # plot_scores(filename, clip_bins) gt_dict[filename] = clip_scores return gt_dict
def get_model(x_images, y_labels, mode, dataset, args): """ Define core model and loss function using inputs x_images and y_labels Parameters ---------- x_images: Tensor model inputs with shape [batch_size, height, width, channels] y_labels: Tensor ground truth tensor. If args.mixup is True, then y_labels have shape [batch_size, num_classes], else [batch_size] mode: str Training mode. Valid values are [train, test] dataset: dict dataset object args: command line arguments Returns ------- scaffold: dict A scaffold contains fetches, optimizer, metrics, summary writer, saver, etc. """ inputs = K.Input(tensor=x_images) if "resnet_v2" in args.net_name: kwargs = {"kernel_initializer": "he_normal"} net = net_lib.resnet_v2(int(args.net_name.split("_")[2]), args.init_channel or 64, dataset["num_classes"], dataset["first_downsample"], args.drop_rate, weight_decay=args.weight_decay, **kwargs) elif "resnet" in args.net_name: kwargs = {"kernel_initializer": "he_normal"} net = net_lib.resnet(int(args.net_name.split("_")[1]), args.init_channel or 64, dataset["num_classes"], dataset["first_downsample"], args.drop_rate, weight_decay=args.weight_decay, **kwargs) elif "densenet" in args.net_name: net = net_lib.densenet(int(args.net_name.split("_")[1]), dataset["num_classes"], dataset["first_downsample"], args.init_channel, args.growth_rate, args.layers_per_block, not args.no_bottleneck, args.compression, args.drop_rate, args.weight_decay) else: raise NotImplementedError y_logits = net(inputs) model = K.Model(inputs=inputs, outputs=y_logits) scaffold = {"model": model} with tf.name_scope("Loss"): if args.mixup: ce_loss = tf.losses.softmax_cross_entropy(y_labels, y_logits) else: ce_loss = tf.losses.sparse_softmax_cross_entropy( y_labels, y_logits) regu_loss = tf.add_n(model.losses) total_loss = ce_loss + regu_loss if mode == "train": optimizer = solver.Solver(args, dataset["train"]["steps"]) scaffold["optimizer"] = optimizer scaffold["fetches"] = { "train_op": optimizer.minimize(total_loss, model.updates), "total_loss": total_loss, "regu_loss": regu_loss } # Define checkpoint saver and summary writer scaffold["writer"] = tf.summary.FileWriter( args.model_dir, graph=tf.get_default_graph()) # Define summary tf.summary.image("image", x_images) tf.summary.scalar("learning_rate", optimizer.lr) scaffold["summaries"] = tf.summary.merge_all() elif mode == "test": scaffold["fetches"] = { "total_loss": total_loss, "regu_loss": regu_loss } else: raise NotImplementedError scaffold["saver"] = tf.train.Saver(max_to_keep=1, save_relative_paths=True) if args.save_best_ckpt: scaffold["best_saver"] = tf.train.Saver( saver_def=scaffold["saver"].as_saver_def()) with tf.name_scope("Metric"): y_pred = tf.argmax(y_logits, axis=1, output_type=tf.int32) if args.mixup: y_labels = tf.argmax(y_labels, axis=1, output_type=tf.int32) accuracy, acc_update = tf.metrics.accuracy(y_labels, y_pred) scaffold["metrics"] = {"acc": accuracy, "acc_up": acc_update} return scaffold