def after_step(self, current_step): # import ipdb; ipdb.set_trace() if current_step % self.engine.config.TRAIN.eval_step == 0 and current_step!= 0: self.engine.logger.info('Start clsuter the feature') frame_num = self.engine.config.DATASET.train.clip_length frame_step = self.engine.config.DATASET.train.clip_step feature_record = [] for video_name in self.engine.cluster_dataset_keys: dataset = self.engine.cluster_dataset_dict[video_name] data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) # import ipdb; ipdb.set_trace() for test_input, anno, meta in data_loader: future = data[:, :, 2, :, :].cuda() # t+1 frame current = data[:, :, 1, :, :].cuda() # t frame past = data[:, :, 0, :, :].cuda() # t frame bboxs = get_batch_dets(self.engine.Detector, current) for index, bbox in enumerate(bboxs): # import ipdb; ipdb.set_trace() if bbox.numel() == 0: # import ipdb; ipdb.set_trace() # bbox = torch.zeros([1,4]) bbox = bbox.new_zeros([1,4]) # print('NO objects') # continue # import ipdb; ipdb.set_trace() current_object, _ = multi_obj_grid_crop(current[index], bbox) future_object, _ = multi_obj_grid_crop(future[index], bbox) future2current = torch.stack([future_object, current_object], dim=1) past_object, _ = multi_obj_grid_crop(past[index], bbox) current2past = torch.stack([current_object, past_object], dim=1) _, _, A_input = frame_gradient(future2current) A_input = A_input.sum(1) _, _, C_input = frame_gradient(current2past) C_input = C_input.sum(1) A_feature, _, _ = self.engine.A(A_input) B_feature, _, _ = self.engine.B(current_object) C_feature, _, _ = self.engine.C(C_input) A_flatten_feature = A_feature.flatten(start_dim=1) B_flatten_feature = B_feature.flatten(start_dim=1) C_flatten_feature = C_feature.flatten(start_dim=1) ABC_feature = torch.cat([A_flatten_feature, B_flatten_feature, C_flatten_feature], dim=1).detach() # import ipdb; ipdb.set_trace() ABC_feature_s = torch.chunk(ABC_feature, ABC_feature.size(0), dim=0) # feature_record.extend(ABC_feature_s) for abc_f in ABC_feature_s: temp = abc_f.squeeze(0).cpu().numpy() feature_record.append(temp) # import ipdb; ipdb.set_trace() self.engine.logger.info(f'Finish the video:{video_name}') self.engine.logger.info(f'Finish extract feature, the sample:{len(feature_record)}') device = torch.device('cuda:0') cluster_input = torch.from_numpy(np.array(feature_record)) # cluster_input = np.array(feature_record) time = mmcv.Timer() # import ipdb; ipdb.set_trace() cluster_centers = cluster_input.new_zeros(size=[self.engine.config.TRAIN.cluster.k, 3072]) cluster_score = 0.0 cluster_model = None for _ in range(1): # model = KMeans(n_clusters=self.trainer.config.TRAIN.cluster.k, init='k-means++',n_init=10, algorithm='full',max_iter=300).fit(cluster_input) # labels = model.labels_ # temp = calinski_harabaz_score(cluster_input, labels) # if temp > cluster_score: # cluster_model = model # print(f'the temp score is {temp}') cluster_ids_x, cluster_center = kmeans(X=cluster_input, num_clusters=self.engine.config.TRAIN.cluster.k, distance='euclidean', device=device) cluster_centers += cluster_center # import ipdb; ipdb.set_trace() # cluster_centers = cluster_centers / 10 # model.fit(cluster_input) # pusedo_labels = model.predict(cluster_input) pusedo_labels = kmeans_predict(cluster_input, cluster_centers, 'euclidean', device=device).detach().cpu().numpy() # pusedo_labels = cluster_model.labels_ print(f'The cluster time is :{time.since_start()/60} min') # import ipdb; ipdb.set_trace() # pusedo_labels = np.split(pusedo_labels, pusedo_labels.shape[0], 0) pusedo_dataset = os.path.join(self.engine.config.TRAIN.pusedo_data_path, 'pusedo') if not os.path.exists(pusedo_dataset): os.mkdir(pusedo_dataset) np.savez_compressed(os.path.join(pusedo_dataset, f'{self.engine.config.DATASET.name}_dummy.npz'), data=cluster_input, label=pusedo_labels) print(f'The save time is {time.since_last_check() / 60} min') # binary_labels = MultiLabelBinarizer().fit_transform(pusedo_labels) # self.trainer.ovr_model = OneVsRestClassifier(LinearSVC(random_state = 0)).fit(cluster_input,binary_labels) # self.trainer.ovr_model = OneVsRestClassifier(LinearSVC(random_state = 0), n_jobs=16).fit(cluster_input, pusedo_labels) self.engine.ovr_model = self.engine.ovr_model.fit(cluster_input, pusedo_labels) # self.trainer.saved_model['OVR'] = self.trainer.ovr_model print(f'The train ovr: {time.since_last_check() / 60} min') joblib.dump(self.engine.ovr_model, self.engine.ovr_model_path)
def train(self,current_step): # Pytorch [N, C, D, H, W] # initialize start = time.time() self.set_requires_grad(self.A, True) self.set_requires_grad(self.B, True) self.set_requires_grad(self.C, True) self.set_requires_grad(self.Detector, False) self.A.train() self.B.train() self.C.train() self.Detector.eval() writer = self.kwargs['writer_dict']['writer'] global_steps = self.kwargs['writer_dict']['global_steps_{}'.format(self.kwargs['model_type'])] # get the data data, anno, meta = next(self._train_loader_iter) # the core for dataloader self.data_time.update(time.time() - start) # base on the D to get each frame # in this method, D = 3 and not change future = data[:, :, -1, :, :].cuda() # t+1 frame current = data[:, :, 1, :, :].cuda() # t frame past = data[:, :, 0, :, :].cuda() # t-1 frame bboxs = get_batch_dets(self.Detector, current) # this method is based on the objects to train the model insted of frames for index, bbox in enumerate(bboxs): if bbox.numel() == 0: bbox = bbox.new_zeros([1, 4]) # get the crop objects input_currentObject_B, _ = multi_obj_grid_crop(current[index], bbox) future_object, _ = multi_obj_grid_crop(future[index], bbox) # future2current = torch.stack([future_object, input_currentObject_B], dim=1) current2future = torch.stack([input_currentObject_B, future_object], dim=1) past_object, _ = multi_obj_grid_crop(past[index], bbox) # current2past = torch.stack([input_currentObject_B, past_object], dim=1) past2current = torch.stack([past_object, input_currentObject_B], dim=1) _, _, input_objectGradient_A = frame_gradient(current2future) input_objectGradient_A = input_objectGradient_A.sum(1) _, _, input_objectGradient_C = frame_gradient(past2current) input_objectGradient_C = input_objectGradient_C.sum(1) # import ipdb; ipdb.set_trace() # True Process =================Start=================== # original_A = (0.3 * input_objectGradient_A[:,0] + 0.59 * input_objectGradient_A[:,1] + 0.11 * input_objectGradient_A[:,2]).unsqueeze(1) # original_B = (0.3 * input_currentObject_B[:,0] + 0.59 * input_currentObject_B[:,1] + 0.11 * input_currentObject_B[:,2]).unsqueeze(1) # original_C = (0.3 * input_objectGradient_C[:,0] + 0.59 * input_objectGradient_C[:,1] + 0.11 * input_objectGradient_C[:,2]).unsqueeze(1) _, output_recGradient_A, original_A = self.A(input_objectGradient_A) _, output_recObject_B, original_B = self.B(input_currentObject_B) _, output_recGradient_C, original_C = self.C(input_objectGradient_C) # import ipdb; ipdb.set_trace() # loss_A = self.a_loss(output_recGradient_A, input_objectGradient_A) # loss_B = self.b_loss(output_recObject_B, input_currentObject_B) # loss_C = self.c_loss(output_recGradient_C, input_objectGradient_C) loss_A = self.ALoss(output_recGradient_A, original_A) loss_B = self.BLoss(output_recObject_B, original_B) loss_C = self.CLoss(output_recGradient_C, original_C) loss_all = self.loss_lamada['ALoss'] * loss_A + self.loss_lamada['BLoss'] * loss_B + self.loss_lamada['CLoss'] * loss_C self.optimizer_ABC.zero_grad() loss_all.backward() self.optimizer_ABC.step() # record self.loss_meter_ABC.update(loss_all.detach()) if self.config.TRAIN.general.scheduler.use: self.optimizer_ABC_scheduler.step() # ======================End================== self.batch_time.update(time.time() - start) if (current_step % self.steps.param['log'] == 0): msg = make_info_message(current_step, self.steps.param['max'], self.kwargs['model_type'], self.batch_time, self.config.TRAIN.batch_size, self.data_time, [self.loss_meter_ABC]) self.logger.info(msg) writer.add_scalar('Train_loss_ABC', self.loss_meter_ABC.val, global_steps) if (current_step % self.steps.param['vis'] == 0): vis_objects = OrderedDict({ 'train_input_objectGradient_A': input_objectGradient_A.detach(), 'train_input_currentObject_B': input_currentObject_B.detach(), 'train_input_objectGradient_C': input_objectGradient_C.detach(), 'train_output_recGradient_A': output_recGradient_A.detach(), 'train_output_recObject_B': output_recObject_B.detach(), 'train_output_recGradient_C': output_recGradient_C.detach() }) tensorboard_vis_images(vis_objects, writer, global_steps, self.normalize.param['train']) global_steps += 1 # reset start start = time.time() # self.saved_model = {'A':self.A, 'B':self.B, 'C':self.C} # self.saved_model['A'] = self.A # self.saved_model['B'] = self.B # self.saved_model['C'] = self.C # # self.saved_optimizer = {'optim_ABC': self.optim_ABC} # self.saved_optimizer['optimizer_ABC'] = self.optimizer_ABC # # self.saved_loss = {'loss_ABC':self.loss_meter_ABC.val} # self.saved_loss['loss_ABC'] = self.loss_meter_ABC.val self.saved_stuff['step'] = global_steps self.saved_stuff['loss'] = self.loss_meter_ABC.val self.saved_stuff['A'] = self.A self.saved_stuff['B'] = self.B self.saved_stuff['C'] = self.C self.saved_stuff['optimizer_ABC'] = self.optimizer_ABC self.kwargs['writer_dict']['global_steps_{}'.format(self.kwargs['model_type'])] = global_steps
def evaluate(self, current_step): ''' Evaluate the results of the model !!! Will change, e.g. accuracy, mAP..... !!! Or can call other methods written by the official ''' # self.trainer.set_requires_grad(self.trainer.A, False) # self.trainer.set_requires_grad(self.trainer.B, False) # self.trainer.set_requires_grad(self.trainer.C, False) # self.trainer.set_requires_grad(self.trainer.Detector, False) # self.trainer.A.eval() # self.trainer.B.eval() # self.trainer.C.eval() # self.trainer.Detector.eval() self.engine.set_all(False) frame_num = self.engine.config.DATASET.test_clip_length tb_writer = self.engine.kwargs['writer_dict']['writer'] global_steps = self.engine.kwargs['writer_dict']['global_steps_{}'.format(self.engine.kwargs['model_type'])] score_records = [] # psnr_records = [] total = 0 random_video_sn = torch.randint(0, len(self.engine.test_dataset_keys), (1,)) # random_video_sn = 0 for sn, video_name in enumerate(self.engine.test_dataset_keys): # _temp_test_folder = os.path.join(self.testing_data_folder, dir) # need to improve # dataset = AvenueTestOld(_temp_test_folder, clip_length=frame_num) dataset = self.engine.test_dataset_dict[video_name] len_dataset = dataset.pics_len test_iters = len_dataset - frame_num + 1 test_counter = 0 # feature_record = [] data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) # import ipdb; ipdb.set_trace() scores = np.empty(shape=(len_dataset,),dtype=np.float32) # for test_input, _ in data_loader: random_frame_sn = torch.randint(0, len_dataset,(1,)) for frame_sn, (test_input, anno, meta) in enumerate(data_loader): feature_record_object = [] future = test_input[:, :, 2, :, :].cuda() current = test_input[:, :, 1, :, :].cuda() past = test_input[:, :, 0, :, :].cuda() bboxs = get_batch_dets(self.engine.Detector, current) for index, bbox in enumerate(bboxs): # import ipdb; ipdb.set_trace() if bbox.numel() == 0: bbox = bbox.new_zeros([1,4]) # print('NO objects') # continue current_object, _ = multi_obj_grid_crop(current[index], bbox) future_object, _ = multi_obj_grid_crop(future[index], bbox) future2current = torch.stack([future_object, current_object], dim=1) past_object, _ = multi_obj_grid_crop(past[index], bbox) current2past = torch.stack([current_object, past_object], dim=1) _, _, A_input = frame_gradient(future2current) A_input = A_input.sum(1) _, _, C_input = frame_gradient(current2past) C_input = C_input.sum(1) A_feature, temp_a, _ = self.engine.A(A_input) B_feature, temp_b, _ = self.engine.B(current_object) C_feature, temp_c, _ = self.engine.C(C_input) # import ipdb; ipdb.set_trace() if sn == random_video_sn and frame_sn == random_frame_sn: vis_objects = OrderedDict({ 'eval_oc_input_a': A_input.detach(), 'eval_oc_output_a': temp_a.detach(), 'eval_oc_input_b': current_object.detach(), 'eval_oc_output_b': temp_b.detach(), 'eval_oc_input_c': C_input.detach(), 'eval_oc_output_c': temp_c.detach(), }) tensorboard_vis_images(vis_objects, tb_writer, global_steps, normalize=self.engine.normalize.param['val']) A_flatten_feature = A_feature.flatten(start_dim=1) B_flatten_feature = B_feature.flatten(start_dim=1) C_flatten_feature = C_feature.flatten(start_dim=1) ABC_feature = torch.cat([A_flatten_feature, B_flatten_feature, C_flatten_feature], dim=1).detach() ABC_feature_s = torch.chunk(ABC_feature, ABC_feature.size(0), dim=0) for abc_f in ABC_feature_s: temp = abc_f.squeeze(0).cpu().numpy() feature_record_object.append(temp) predict_input = np.array(feature_record_object) self.engine.ovr_model = joblib.load(self.engine.ovr_model_path) g_i = self.engine.ovr_model.decision_function(predict_input) # the svm score of each object in one frame frame_score = oc_score(g_i) # test_psnr = psnr_error(g_output, test_target) # test_psnr = test_psnr.tolist() scores[test_counter+frame_num-1] = frame_score test_counter += 1 total+=1 if test_counter >= test_iters: scores[:frame_num-1]=scores[frame_num-1] score_records.append(scores) print(f'finish test video set {video_name}') break self.engine.pkl_path = save_score_results(self.engine.config, self.engine.logger, verbose=self.engine.verbose, config_name=self.engine.config_name, current_step=current_step, time_stamp=self.engine.kwargs["time_stamp"],score=score_records) results = self.engine.evaluate_function(self.engine.pkl_path, self.engine.logger, self.engine.config) self.engine.logger.info(results) tb_writer.add_text('AUC of ROC curve', f'AUC is {results.auc:.5f}',global_steps) return results.auc