Exemple #1
0
def main(input_path, output_path, clip_stride):
    model = C3D().to(device)
    model.load_state_dict(torch.load('c3d.pickle'))
    model.eval()

    if device.type == 'cuda':
        torch.backends.cudnn.benchmark = True
        model = torch.nn.DataParallel(model)

    train_list, test_list = generate_train_test_list(input_path, output_path)
    print('Done generating list')

    for row in train_list:
        source_path, output_folder, output_file = row
        print('Processing', source_path)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        if not os.path.exists(output_file):
            extract(model, source_path, output_file, clip_stride)

    for row in test_list:
        source_path, output_folder, output_file = row
        print('Processing', source_path)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        if not os.path.exists(output_file):
            extract(model, source_path, output_file, clip_stride)
Exemple #2
0
def main():
    """
    Main function.
    """

    # load a clip to be predicted
    X = get_sport_clip(
        'roger')  # batch size * channel * frames * height * width
    X = Variable(X)
    print(X.size())
    X = X.cuda()

    # get network pretrained model
    net = C3D()
    net.load_state_dict(torch.load('c3d.pickle'))
    net.cuda()
    net.eval()
    print("create network")
    # perform prediction
    prediction = net(X)
    print(prediction.size())
    print(prediction)
    prediction = prediction.data.cpu().numpy()
    print("prediction")
    # read labels
    labels = read_labels_from_file('labels.txt')

    # print top predictions
    top_inds = prediction[0].argsort(
    )[::-1][:5]  # reverse sort and take five largest items
    print('\nTop 5:')
    for i in top_inds:
        print('{:.5f} {}'.format(prediction[0][i], labels[i]))
Exemple #3
0
def main():
    """
    主函数
    """

    # 载入视频片段做预测
    X = get_sport_clip('roger')  #roger视频
    X = Variable(X)
    X = X.cuda()  #GPU

    # 载入预训练好了的模型权重
    net = C3D()  #模型实例化
    net.load_state_dict(torch.load('c3d.pickle'))  #填入权重
    net.cuda()
    net.eval()  # 调到测试模式

    # 然后直接拿网络预测就好
    prediction = net(X)
    prediction = prediction.data.cpu().numpy()

    # 读入真实标签
    labels = read_labels_from_file('labels.txt')

    # 得到topN的预测类别
    top_inds = prediction[0].argsort()[::-1][:5]
    print('\nTop 5:')
    for i in top_inds:
        print('{:.5f} {}'.format(prediction[0][i], labels[i]))
Exemple #4
0
def main():
    #playsound('C:\\Users\\Pyo\\Desktop\\PSIML19\\GestureRecognition\\c3d\\fanfare_x.wav')
    net = C3D()

    net = NEW_model.newmodule(net)

    ###MOZDA???###
    # net.cuda()
    net.load_state_dict(
        torch.load('checkpoints\\adam10e6eps12regul01-epoch27_0.7245'))
    net.eval()

    camera(net)
Exemple #5
0
    def __init__(self, anchors, all_anchors, inds_inside):
        super(TPN, self).__init__()
        # init some para
        self.image_shape = [[240, 320]
                            ]  # for one batch, TODO: maybe need to change here
        self.anchors = anchors  # (630, x, y, xw, yw)                anchors coordinates
        self.inds_inside = inds_inside
        self.all_anchors = all_anchors
        # get C3D part, use pretrained weight
        c3d = C3D()

        c3d.load_state_dict(torch.load(c3d_checkpoint))
        self.c3d_part1 = nn.Sequential(*list(
            c3d.modules())[1:4])  # be careful about these two indices
        # get conv2
        self.c3d_part2 = nn.Sequential(*list(c3d.modules())[4:13])  #

        self.BN1 = torch.nn.BatchNorm2d(512)
        #
        # for RPN
        self._CPN = CPN(self.anchors, all_anchors, inds_inside)

        self.n_classes = 22

        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)

        self.head_to_tail_ = torch.nn.Sequential(
            nn.Linear(512 * 7 * 7,
                      1024),  # change from 4096 to 2048, for memory limit
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(1024,
                      4096),  # change from 4096 to 2048, for memory limit
            nn.ReLU(True))

        self.RCNN_bbox_pred = torch.nn.Linear(4096, 4 * self.n_classes)
        self.RCNN_cls_score = torch.nn.Linear(4096, self.n_classes)
Exemple #6
0
def train():
    C3dNet = C3D()
    C3dNet.cuda()
    C3dNet.train()

    learning_rate = 0.01
    optimizer = torch.optim.Adam(C3dNet.parameters(), lr=learning_rate)
    # optimizer = torch.optim.SGD(C3dNet.parameters(), lr=learning_rate, momentum=0.9)
    loss_func = torch.nn.CrossEntropyLoss()

    dset_train = ParkinsonDataset(data_type='train')

    train_loader = DataLoader(dset_train, batch_size=16, shuffle=True, num_workers=0)

    print("Training Data : ", len(train_loader.dataset))
    print("training start!")

    for epoch in range(400):

        if epoch>0 and epoch % 100 ==0:
            learning_rate = learning_rate / 2
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        for batch_index, (data, label) in enumerate(train_loader):
            data, label = data.cuda(), label.cuda()
            # # # label = label.float()
            predict = C3dNet(data)
            # print("predict and label size: ", predict.size(), label.size())
            loss = loss_func(predict, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print("epoch: {}/399 | batch_index: {} | loss: {}".format(epoch, batch_index, loss.item()))
        if epoch > 0 and (epoch+1) % 100 == 0:
            torch.save(C3dNet.state_dict(), './weights/MyC3dNet{}.pth'.format(epoch+1))
Exemple #7
0
    def __init__(self, anchors, all_anchors, inds_inside):
        super(CPN, self).__init__()
        self.image_shape = [[240, 320]
                            ]  # for one batch, TODO: maybe need to change here
        self.anchors = anchors  # (630, x, y, xw, yw)                anchors coordinates
        self.inds_inside = inds_inside
        self.all_anchors = all_anchors
        self.nc_score_out = 2 * 12  # 2(bg/fg)  * 12 (anchors)
        self.nc_bbox_out = 4 * 12  # 4(coords) * 12 (anchors)
        c3d = C3D()
        self.action_num = 22  #-1       # 21(classes) do not consider bg
        self.action_anchor_num = self.action_num * 12
        c3d.load_state_dict(torch.load(c3d_checkpoint))

        self.RPN_Conv = nn.Conv2d(512, 512, 3, 1, 1, bias=True)
        self.BN1 = nn.BatchNorm2d(512)
        self.RPN_cls_bbox_action = nn.Conv2d(
            512, self.nc_score_out + self.nc_bbox_out + self.action_anchor_num,
            1, 1, 0)
        self.BN2 = nn.BatchNorm2d(self.nc_score_out + self.nc_bbox_out +
                                  self.action_anchor_num)
        self.RPN_proposal = _ProposalLayer(self.anchors, self.all_anchors)
        self.RPN_anchor_target = _AnchorTargetLayer(self.anchors,
                                                    self.inds_inside)
def main():
    """
    Main function.
    """

    # load a clip to be predicted

    # X = get_sport_clip('roger')
    X = get_gesture_clip('c3d\\data\\3919')

    # X = torch.rand(size=(3,13,112,112))
    X = Variable(X)
    X = X.cuda()

    # get network pretrained model
    net = C3D()
    net.load_state_dict(torch.load('c3d\\c3d.pickle'))


    # cast net to new net
    import NEW_model

    net = NEW_model.newmodule(net)
    # net.load_state_dict(torch.load('checkpoints\\adam10e6eps1220_0.6531'))

    net.cuda()

    # retrainovanje!!!
    from New_dataset import GesturesDataset
    from New_dataset import loadlabelsdict

    labelsdict = loadlabelsdict("c3d\\jester-v1-train.csv")
    dataloaders = {}
    dataset_sizes = {}

    trainset = GesturesDataset("c3d\\splittraindata\\train", labelsdict)
    validset = GesturesDataset("c3d\\splittraindata\\valid", labelsdict)
    dataloaders['train'] = torch.utils.data.DataLoader(
        trainset, batch_size=1, num_workers=2,shuffle=True)
    dataloaders['valid'] = torch.utils.data.DataLoader(
        validset, batch_size=1, num_workers=2,shuffle=True)

    dataset_sizes['train'] = len(trainset)
    dataset_sizes['valid'] = len(validset)
    import gesturetrain
    # print(trainset[0])
    gesturetrain.train(net, dataloaders, dataset_sizes)

    import datetime
    t = datetime.datetime.now()

    # perform prediction
    net.eval()
    prediction = net(X)
    prediction = prediction.data.cpu().numpy()

    print("predict time: " + str(datetime.datetime.now()-t))

    # print top predictions
    # reverse sort and take five largest items

    print(prediction)

    print("Predict treba da bude 2")
    print()

    top_inds = prediction[0].argsort()[::-1][:5]
    print('\nTop 5:')
    for i in top_inds:
        print('pred:{:.5f}   label:{}'.format(prediction[0][i],i))
Exemple #9
0
#--- create dataset folders
# root folder
path_output = args.data_path + args.feature_in + '_' + args.base_model + '/'
if args.structure != 'tsn':
	path_output = args.data_path + args.feature_in + '-' + args.structure + '/'
if not os.path.isdir(path_output):
	os.makedirs(path_output)

###### set up the model ######
# Load the pretrained model
print(Fore.GREEN + 'Pre-trained model:', args.base_model)

if args.base_model == 'c3d':
	from C3D_model import C3D
	c3d_clip_size = 16
	model = C3D()
	model.load_state_dict(torch.load(args.pretrain_weight))
    
	list_model = list(model.children())
	list_conv = list_model[:-6]
	list_fc = list_model[-6:-4]
	extractor_conv = nn.Sequential(*list_conv)
	extractor_fc = nn.Sequential(*list_fc)

	# multi-gpu
	extractor_conv = torch.nn.DataParallel(extractor_conv.cuda())
	extractor_conv.eval()
	extractor_fc = torch.nn.DataParallel(extractor_fc.cuda())
	extractor_fc.eval()

else:
    beta = (.5, .999)

    # set gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id

    # data load
    vol_names = [
        filename for filename in os.listdir(data_dir)
        if int(filename.split("_")[-1].split(".")[0]) in normal
    ]
    vol_names.sort()
    vol_names = vol_names[:100]
    volumes, labels = data_load_ver2(vol_names, data_dir)

    # model
    model = C3D(pretrained=True)
    model.train()
    model.cuda()

    # set loss, optimizer
    BCE = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=beta)

    # training with two batch
    y = torch.tensor(data=np.array([[1.], [0.]]), dtype=torch.float32).cuda()

    for epoch in range(1, epoch_num + 1):
        total_loss = 0
        pre_wrong = []
        delay_wrong = []
        correct = []
def main():
    """
    Main function.
    """
    parser = argparse.ArgumentParser("C3D & ResNet feature extraction")
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="increase output verbosity")
    # ---------------------------------------------------------------------------------------------------------------- #
    parser.add_argument('--videos_root_local',
                        type=str,
                        default='/home/george/datasets/HMDB51',
                        help="set videos root path")
    parser.add_argument('--videos_root_remote',
                        type=str,
                        default='/shared/datasets/HMDB51',
                        help="set videos root path")
    parser.add_argument('--remote', action='store_true')
    parser.add_argument('--c3d_model_root',
                        type=str,
                        default='model',
                        help="set C3D model root path")
    parser.add_argument('--video_list',
                        type=str,
                        default='video_HMDB51.list',
                        help="set video list path")
    parser.add_argument(
        '--preds_c3d_root',
        type=str,
        default='preds_c3d_HMDB51',
        help="set video C3D predictions path, to store .npy files")
    parser.add_argument(
        '--preds_cnn_root',
        type=str,
        default='preds_cnn_HMDB51',
        help="set video CNN predictions path, to store .npy files")
    parser.add_argument('--c3d_batch_size',
                        type=int,
                        default=6,
                        help="set C3D batch size")
    parser.add_argument('--cnn_batch_size',
                        type=int,
                        default=32,
                        help="set CNN batch size")
    parser.add_argument('--batch', action='store_true')
    parser.set_defaults(batch=True)
    parser.add_argument('--gpu', type=int, default=0, help="set gpu id")
    parser.add_argument('--cuda',
                        dest='cuda',
                        action='store_true',
                        help="use CUDA during training")
    parser.set_defaults(cuda=True)
    args = parser.parse_args()

    if args.batch == False:
        print(
            "Currently, you *have* to run this in batch mode, i.e. batch_size>1. Quitting..."
        )
        quit()

    # if args.cuda:
    #     os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    #     if torch.cuda.is_available():
    #         print('Using CUDA device {}'.format(args.gpu))

    if args.cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if args.remote:
        args.videos_root = args.videos_root_remote
    else:
        args.videos_root = args.videos_root_local

    args.c3d_model_root = os.path.join(os.getcwd(), args.c3d_model_root)
    args.video_list = os.path.join(os.getcwd(), args.video_list)
    args.preds_c3d_root = os.path.join(os.getcwd(), args.preds_c3d_root)

    model_path = os.path.join(args.c3d_model_root, 'c3d.pickle')
    mean_path = os.path.join(args.c3d_model_root, 'c3d_mean.npy')
    labels = read_labels_from_file('labels_Sports-1M.txt')

    ############################################
    # Load ResNet-50

    resnet50_full = models.resnet50(pretrained=True)

    class ResNet50_FC(torch.nn.Module):
        def __init__(self):
            super(ResNet50_FC, self).__init__()
            self.features = torch.nn.Sequential(
                # stop at FC, to extract FC features, not class scores
                *list(resnet50_full.children())[:-1])

        def forward(self, x):
            x = self.features(x)
            return x

    resnet50 = ResNet50_FC()
    if args.cuda == True:
        resnet50 = resnet50.cuda()
    resnet50.eval()

    ############################################

    if not os.path.exists(args.c3d_model_root):
        os.mkdir(args.c3d_model_root)

    if not os.path.exists(model_path):
        model_url = 'http://imagelab.ing.unimore.it/files/c3d_pytorch/c3d.pickle'
        download_model_cmd = 'wget {} --directory-prefix {}'.format(
            model_url, args.c3d_model_root)
        os.system(download_model_cmd)
    if not os.path.exists(mean_path):
        mean_url = 'https://github.com/albertomontesg/keras-model-zoo/raw/master/kerasmodelzoo/data/c3d_mean.npy'
        download_mean_cmd = 'wget {} --directory-prefix {}'.format(
            mean_url, args.c3d_model_root)
        os.system(download_mean_cmd)

    if not os.path.exists(args.preds_c3d_root):
        os.mkdir(args.preds_c3d_root)
    if not os.path.exists(args.preds_cnn_root):
        os.mkdir(args.preds_cnn_root)

    C3D_STEP = 16
    cnn_size = 224
    mean_cube = np.load(mean_path)
    mean_cube = mean_cube[0]
    cnn_mean = np.array((0.485, 0.456, 0.406))
    cnn_std = np.array((0.229, 0.224, 0.225))
    cursor_up = "\x1b[1A"
    erase_line = "\x1b[1A"

    net = C3D()
    net.load_state_dict(torch.load(model_path))
    if args.cuda == True:
        net = net.cuda()
    net.eval()

    if args.verbose:
        print('Reading video list')
        print('')
        print('')

    video_paths = []
    file = open(args.video_list, 'r')
    for line in file:
        line = line.rstrip('\n')
        if args.videos_root != '':
            video_path = os.path.join(args.videos_root, line)
        else:
            video_path = line
        video_paths.append(video_path)

    vid_cnt = 0
    N_vids = len(video_paths)
    for clip_path in video_paths:
        print(cursor_up + erase_line)
        video_name_with_ext = clip_path.split('/')[-1]
        video_name = video_name_with_ext.split('.')[0]
        preds_filename = video_name + '.npy'
        class_name = clip_path.split('/')[-2]
        class_name = class_name.replace(' ', '_')

        class_preds_c3d_folder = os.path.join(args.preds_c3d_root, class_name)
        if not os.path.exists(class_preds_c3d_folder):
            os.mkdir(class_preds_c3d_folder)
        class_preds_cnn_folder = os.path.join(args.preds_cnn_root, class_name)
        if not os.path.exists(class_preds_cnn_folder):
            os.mkdir(class_preds_cnn_folder)

        c3d_video_preds_path = os.path.join(class_preds_c3d_folder,
                                            preds_filename)
        cnn_video_preds_path = os.path.join(class_preds_cnn_folder,
                                            preds_filename)

        vid_cnt += 1
        # In case that you're having problems with a specific video file, use something like this
        # if vid_cnt==3021:
        #     continue

        print('{:04d}/{:04d} Processing video "{}"'.format(
            vid_cnt, len(video_paths), video_name))
        if os.path.exists(c3d_video_preds_path) and os.path.exists(
                cnn_video_preds_path):
            continue
        print(' ')

        clip_full, clip_full_raw = get_np_clip(clip_path)
        N_frames = clip_full.shape[0]

        #########################################
        # C3D feature extraction

        if not os.path.exists(c3d_video_preds_path):
            N_iters = int(np.ceil(N_frames / C3D_STEP))
            features = []
            frames_t = []
            if args.batch:
                batch_cnt = 0
                batch_clips = []
            for t in range(0, N_iters):
                if t < (N_iters - 1):
                    start_frame = t * C3D_STEP
                else:
                    start_frame = N_frames - C3D_STEP
                batch_c3d_condition = ((N_iters + batch_cnt - t) >=
                                       args.c3d_batch_size)
                clip = clip_full[start_frame:(start_frame +
                                              C3D_STEP), :, :, :].copy()
                clip = preprocess_clip(clip, mean_cube, args.batch
                                       and batch_c3d_condition)
                if args.verbose:
                    print(cursor_up + erase_line)
                    print(
                        'Video {:07d}/{:07d} Frame {:07d}/{:07d} | {:02d}% | Using C3D for video "{}" | Batch: {}'
                        .format(vid_cnt, N_vids, start_frame + 1, N_frames,
                                int(100 * (start_frame / N_frames)),
                                video_name, args.batch))
                frames_t.append(start_frame)
                if (not args.batch) or (not batch_c3d_condition):
                    #print('Gathering single clip')
                    with torch.no_grad():
                        if args.cuda:
                            X = Variable(clip.cuda())
                        probs, feats = net(X)
                        feats_cpu = feats.data.cpu().numpy()
                    features.append(feats_cpu[0])
                elif batch_c3d_condition:
                    batch_cnt += 1
                    batch_clips.append(clip)
                    #print('Gathering video batch {}/{}'.format(batch_cnt, args.c3d_batch_size))
                    if batch_cnt == args.c3d_batch_size:
                        clip = np.array(batch_clips)
                        clip = torch.from_numpy(clip)
                        with torch.no_grad():
                            X = Variable(clip)
                            if args.cuda:
                                X = X.cuda()
                            probs, feats = net(X)
                            feats_cpu = feats.data.cpu().numpy()
                        batch_clips = []
                        for batch_iter in range(0, args.c3d_batch_size):
                            features.append(feats_cpu[batch_iter])
                        batch_cnt = 0
                clip = []
                X = []
            assert (len(features) == len(frames_t))
            #print('C3D : gathered %d vectors in %d times' % ( len(features), len(frames_t) ))
            video_dict_c3d = {'features': features, 'frames_t': frames_t}
            np.save(c3d_video_preds_path, video_dict_c3d)

        #########################################
        # CNN feature extraction

        if not os.path.exists(cnn_video_preds_path):
            if args.batch:
                batch_cnt = 0
                batch_imgs = []
            features = []
            frames_t = []
            for t in range(0, N_frames):
                frame_index = t
                batch_cnn_condition = ((N_frames + batch_cnt - t) >=
                                       args.cnn_batch_size)
                img = clip_full_raw[frame_index].copy()
                if args.verbose:
                    print(cursor_up + erase_line)
                    print(
                        'Video {:07d}/{:07d} Frame {:07d}/{:07d} | {:02d}% | Using ResNet for video "{}"'
                        .format(vid_cnt, N_vids, frame_index + 1, N_frames,
                                int(100 * (frame_index / N_frames)),
                                video_name))
                img = preprocess_img(img, cnn_size, args.batch
                                     and batch_cnn_condition)
                frames_t.append(frame_index)
                if (not args.batch) or (not batch_cnn_condition):
                    #print('Gathering single image')
                    img = img / 255.0
                    for ch_i in range(0, 3):
                        img[0,
                            ch_i, :, :] = img[0, ch_i, :, :] - cnn_mean[ch_i]
                        img[0, ch_i, :, :] = img[0, ch_i, :, :] / cnn_std[ch_i]
                    img = torch.from_numpy(img)
                    with torch.no_grad():
                        X = Variable(img)
                        if args.cuda:
                            X = X.cuda()
                        feats = resnet50(X)
                        feats_cpu = feats.data.cpu().numpy()
                    features.append(feats_cpu[0].flatten())
                elif batch_cnn_condition:
                    batch_cnt += 1
                    img = img / 255.0
                    for ch_i in range(0, 3):
                        img[ch_i, :, :] = img[ch_i, :, :] - cnn_mean[ch_i]
                        img[ch_i, :, :] = img[ch_i, :, :] / cnn_std[ch_i]
                    batch_imgs.append(img)
                    #print('Gathering image batch {}/{}'.format(batch_cnt, args.cnn_batch_size))
                    if batch_cnt == args.cnn_batch_size:
                        img = np.array(batch_imgs)
                        img = torch.from_numpy(img)
                        with torch.no_grad():
                            X = Variable(img)
                            if args.cuda:
                                X = X.cuda()
                            feats = resnet50(X)
                            feats_cpu = feats.data.cpu().numpy()
                        batch_imgs = []
                        for batch_iter in range(0, args.cnn_batch_size):
                            features.append(feats_cpu[batch_iter].flatten())
                        batch_cnt = 0
                img = []
                X = []
            assert (len(features) == len(frames_t))
            #print('CNN : gathered %d vectors in %d times' % ( len(features), len(frames_t) ))
            video_dict_cnn = {'features': features, 'frames_t': frames_t}
            np.save(cnn_video_preds_path, video_dict_cnn)
    return features


# entry point
if __name__ == '__main__':

    #load data
    features = np.load('sample.npy')
    labels = np.load('label.npy')

    #preprocess the features and labels
    features, labels = pre_treat(features, labels)
    features = filter_none_data(features)

    #change the features to appropriate type
    features = features.astype(np.int32)

    #N,C,H,W, actually frames as H
    features = features.reshape(-1, 2, 8, 15)

    #initial the net
    net = C3D()

    #assign the tunning parameters
    max_iterations = 200
    batch_size = 15
    learning_rate = 0.0005

    #train the net parameters
    training(net, features, labels, max_iterations, batch_size, learning_rate)
Exemple #13
0
	def _prepare_DA(self, num_class, base_model): # convert the model to DA framework
		if base_model == 'c3d': # C3D mode: in construction...
			from C3D_model import C3D
			model_test = C3D()
			self.feature_dim = model_test.fc7.in_features
		elif base_model == 'i3d':
			from dataset_preparation.pytorch_i3d import InceptionI3d as I3D
			model_test = I3D()
			self.feature_dim = model_test.logits.conv3d.in_channels
		else:
			model_test = getattr(torchvision.models, base_model)(True) # model_test is only used for getting the dim #
			# pdb.set_trace()
			self.feature_dim = model_test.fc.in_features

		std = 0.001
		feat_shared_dim = min(self.fc_dim, self.feature_dim) if self.add_fc > 0 and self.fc_dim > 0 else self.feature_dim
		feat_frame_dim = feat_shared_dim

		self.relu = nn.ReLU(inplace=True)
		self.dropout_i = nn.Dropout(p=self.dropout_rate_i)
		self.dropout_v = nn.Dropout(p=self.dropout_rate_v)

		#------ frame-level layers (shared layers + source layers + domain layers) ------#
		if self.add_fc < 1:
			raise ValueError(Back.RED + 'add at least one fc layer')

		# 1. shared feature layers
		self.fc_feature_shared_source = nn.Linear(self.feature_dim, feat_shared_dim)
		normal_(self.fc_feature_shared_source.weight, 0, std)
		constant_(self.fc_feature_shared_source.bias, 0)

		if self.add_fc > 1:
			self.fc_feature_shared_2_source = nn.Linear(feat_shared_dim, feat_shared_dim)
			normal_(self.fc_feature_shared_2_source.weight, 0, std)
			constant_(self.fc_feature_shared_2_source.bias, 0)

		if self.add_fc > 2:
			self.fc_feature_shared_3_source = nn.Linear(feat_shared_dim, feat_shared_dim)
			normal_(self.fc_feature_shared_3_source.weight, 0, std)
			constant_(self.fc_feature_shared_3_source.bias, 0)

		# 2. frame-level feature layers
		self.fc_feature_source = nn.Linear(feat_shared_dim, feat_frame_dim)
		normal_(self.fc_feature_source.weight, 0, std)
		constant_(self.fc_feature_source.bias, 0)

		# 3. domain feature layers (frame-level)
		self.fc_feature_domain = nn.Linear(feat_shared_dim, feat_frame_dim)
		normal_(self.fc_feature_domain.weight, 0, std)
		constant_(self.fc_feature_domain.bias, 0)

		# 4. classifiers (frame-level)
		self.fc_classifier_source = nn.Linear(feat_frame_dim, num_class)
		normal_(self.fc_classifier_source.weight, 0, std)
		constant_(self.fc_classifier_source.bias, 0)

		self.fc_classifier_domain = nn.Linear(feat_frame_dim, 2)
		normal_(self.fc_classifier_domain.weight, 0, std)
		constant_(self.fc_classifier_domain.bias, 0)

		if self.share_params == 'N':
			self.fc_feature_shared_target = nn.Linear(self.feature_dim, feat_shared_dim)
			normal_(self.fc_feature_shared_target.weight, 0, std)
			constant_(self.fc_feature_shared_target.bias, 0)
			if self.add_fc > 1:
				self.fc_feature_shared_2_target = nn.Linear(feat_shared_dim, feat_shared_dim)
				normal_(self.fc_feature_shared_2_target.weight, 0, std)
				constant_(self.fc_feature_shared_2_target.bias, 0)
			if self.add_fc > 2:
				self.fc_feature_shared_3_target = nn.Linear(feat_shared_dim, feat_shared_dim)
				normal_(self.fc_feature_shared_3_target.weight, 0, std)
				constant_(self.fc_feature_shared_3_target.bias, 0)

			self.fc_feature_target = nn.Linear(feat_shared_dim, feat_frame_dim)
			normal_(self.fc_feature_target.weight, 0, std)
			constant_(self.fc_feature_target.bias, 0)
			self.fc_classifier_target = nn.Linear(feat_frame_dim, num_class)
			normal_(self.fc_classifier_target.weight, 0, std)
			constant_(self.fc_classifier_target.bias, 0)

		# BN for the above layers
		if self.use_bn != 'none':  # S & T: use AdaBN (ICLRW 2017) approach
			self.bn_shared_S = nn.BatchNorm1d(feat_shared_dim)  # BN for the shared layers
			self.bn_shared_T = nn.BatchNorm1d(feat_shared_dim)
			self.bn_source_S = nn.BatchNorm1d(feat_frame_dim)  # BN for the source feature layers
			self.bn_source_T = nn.BatchNorm1d(feat_frame_dim)

		#------ aggregate frame-based features (frame feature --> video feature) ------#
		if self.frame_aggregation == 'rnn': # 2. rnn
			self.hidden_dim = feat_frame_dim
			if self.rnn_cell == 'LSTM':
				self.rnn = nn.LSTM(feat_frame_dim, self.hidden_dim//self.n_directions, self.n_layers, batch_first=True, bidirectional=bool(int(self.n_directions/2)))
			elif self.rnn_cell == 'GRU':
				self.rnn = nn.GRU(feat_frame_dim, self.hidden_dim//self.n_directions, self.n_layers, batch_first=True, bidirectional=bool(int(self.n_directions/2)))

			# initialization
			for p in range(self.n_layers):
				kaiming_normal_(self.rnn.all_weights[p][0])
				kaiming_normal_(self.rnn.all_weights[p][1])

			self.bn_before_rnn = nn.BatchNorm2d(1)
			self.bn_after_rnn = nn.BatchNorm2d(1)

		elif self.frame_aggregation == 'trn': # 4. TRN (ECCV 2018) ==> fix segment # for both train/val
			self.num_bottleneck = 512
			self.TRN = TRNmodule.RelationModule(feat_shared_dim, self.num_bottleneck, self.train_segments)
			self.bn_trn_S = nn.BatchNorm1d(self.num_bottleneck)
			self.bn_trn_T = nn.BatchNorm1d(self.num_bottleneck)
		elif self.frame_aggregation == 'trn-m':  # 4. TRN (ECCV 2018) ==> fix segment # for both train/val
			self.num_bottleneck = 256
			self.TRN = TRNmodule.RelationModuleMultiScale(feat_shared_dim, self.num_bottleneck, self.train_segments)
			self.bn_trn_S = nn.BatchNorm1d(self.num_bottleneck)
			self.bn_trn_T = nn.BatchNorm1d(self.num_bottleneck)

		elif self.frame_aggregation == 'temconv': # 3. temconv

			self.tcl_3_1 = TCL(3, 1)
			self.tcl_5_1 = TCL(5, 1)
			self.bn_1_S = nn.BatchNorm1d(feat_frame_dim)
			self.bn_1_T = nn.BatchNorm1d(feat_frame_dim)

			self.tcl_3_2 = TCL(3, 1)
			self.tcl_5_2 = TCL(5, 2)
			self.bn_2_S = nn.BatchNorm1d(feat_frame_dim)
			self.bn_2_T = nn.BatchNorm1d(feat_frame_dim)

			self.conv_fusion = nn.Sequential(
				nn.Conv2d(2, 1, kernel_size=(1, 1), padding=(0, 0)),
				nn.ReLU(inplace=True),
			)

		# ------ video-level layers (source layers + domain layers) ------#
		if self.frame_aggregation == 'avgpool': # 1. avgpool
			feat_aggregated_dim = feat_shared_dim
		if 'trn' in self.frame_aggregation : # 4. trn
			feat_aggregated_dim = self.num_bottleneck
		elif self.frame_aggregation == 'rnn': # 2. rnn
			feat_aggregated_dim = self.hidden_dim
		elif self.frame_aggregation == 'temconv': # 3. temconv
			feat_aggregated_dim = feat_shared_dim

		feat_video_dim = feat_aggregated_dim

		# 1. source feature layers (video-level)
		self.fc_feature_video_source = nn.Linear(feat_aggregated_dim, feat_video_dim)
		normal_(self.fc_feature_video_source.weight, 0, std)
		constant_(self.fc_feature_video_source.bias, 0)

		self.fc_feature_video_source_2 = nn.Linear(feat_video_dim, feat_video_dim)
		normal_(self.fc_feature_video_source_2.weight, 0, std)
		constant_(self.fc_feature_video_source_2.bias, 0)

		# 2. domain feature layers (video-level)
		self.fc_feature_domain_video = nn.Linear(feat_aggregated_dim, feat_video_dim)
		normal_(self.fc_feature_domain_video.weight, 0, std)
		constant_(self.fc_feature_domain_video.bias, 0)

		# 3. classifiers (video-level)
		self.fc_classifier_video_source = nn.Linear(feat_video_dim, num_class)
		normal_(self.fc_classifier_video_source.weight, 0, std)
		constant_(self.fc_classifier_video_source.bias, 0)

		if self.ens_DA == 'MCD':
			self.fc_classifier_video_source_2 = nn.Linear(feat_video_dim, num_class) # second classifier for self-ensembling
			normal_(self.fc_classifier_video_source_2.weight, 0, std)
			constant_(self.fc_classifier_video_source_2.bias, 0)

		self.fc_classifier_domain_video = nn.Linear(feat_video_dim, 2)
		normal_(self.fc_classifier_domain_video.weight, 0, std)
		constant_(self.fc_classifier_domain_video.bias, 0)

		# domain classifier for TRN-M
		if self.frame_aggregation == 'trn-m':
			self.relation_domain_classifier_all = nn.ModuleList()
			for i in range(self.train_segments-1):
				relation_domain_classifier = nn.Sequential(
					nn.Linear(feat_aggregated_dim, feat_video_dim),
					nn.ReLU(),
					nn.Linear(feat_video_dim, 2)
				)
				self.relation_domain_classifier_all += [relation_domain_classifier]

		if self.share_params == 'N':
			self.fc_feature_video_target = nn.Linear(feat_aggregated_dim, feat_video_dim)
			normal_(self.fc_feature_video_target.weight, 0, std)
			constant_(self.fc_feature_video_target.bias, 0)
			self.fc_feature_video_target_2 = nn.Linear(feat_video_dim, feat_video_dim)
			normal_(self.fc_feature_video_target_2.weight, 0, std)
			constant_(self.fc_feature_video_target_2.bias, 0)
			self.fc_classifier_video_target = nn.Linear(feat_video_dim, num_class)
			normal_(self.fc_classifier_video_target.weight, 0, std)
			constant_(self.fc_classifier_video_target.bias, 0)

		# BN for the above layers
		if self.use_bn != 'none':  # S & T: use AdaBN (ICLRW 2017) approach
			self.bn_source_video_S = nn.BatchNorm1d(feat_video_dim)
			self.bn_source_video_T = nn.BatchNorm1d(feat_video_dim)
			self.bn_source_video_2_S = nn.BatchNorm1d(feat_video_dim)
			self.bn_source_video_2_T = nn.BatchNorm1d(feat_video_dim)

		self.alpha = torch.ones(1)
		if self.use_bn == 'AutoDIAL':
			self.alpha = nn.Parameter(self.alpha)

		# ------ attention mechanism ------#
		# conventional attention
		if self.use_attn == 'general':
			self.attn_layer = nn.Sequential(
				nn.Linear(feat_aggregated_dim, feat_aggregated_dim),
				nn.Tanh(),
				nn.Linear(feat_aggregated_dim, 1)
				)