def __init__(self): self.OUT_DIR = RESULTS_DIR + '/mask-rcnn-50-gray500-02' self.OVERLAYS_DIR = self.OUT_DIR + '/evaluate/overlays' self.STATS_DIR = self.OUT_DIR + '/evaluate/stats' self.logger = Logger() ## setup --------------------------- os.makedirs(self.OVERLAYS_DIR, exist_ok=True) os.makedirs(self.STATS_DIR, exist_ok=True) os.makedirs(self.OUT_DIR + '/evaluate/npys', exist_ok=True) os.makedirs(self.OUT_DIR + '/checkpoint', exist_ok=True) os.makedirs(self.OUT_DIR + '/backup', exist_ok=True) logger = self.logger logger.open(self.OUT_DIR + '/log.evaluate.txt', mode='a') logger.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) logger.write('** some experiment setting **\n') logger.write('\tSEED = %u\n' % SEED) logger.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH) logger.write('\tOUT_DIR = %s\n' % self.OUT_DIR) logger.write('\n') ## dataset ---------------------------------------- logger.write('** dataset setting **\n') self.test_dataset = ScienceDataset( 'train1_ids_gray2_500', # 'valid1_ids_gray2_43', mode='train', #'debug1_ids_gray2_10', mode='train', transform=self._eval_augment) self.test_loader = DataLoader( self.test_dataset, sampler=SequentialSampler(self.test_dataset), batch_size=1, drop_last=False, num_workers=4, pin_memory=True, collate_fn=self._eval_collate) logger.write('\ttest_dataset.split = %s\n' % (self.test_dataset.split)) logger.write('\tlen(self.test_dataset) = %d\n' % (len(self.test_dataset))) logger.write('\n')
def run_train(): out_dir = RESULTS_DIR initial_checkpoint = RESULTS_DIR / ' 00072200_model.pth' # pretrain_file = None # imagenet pretrain ## setup ----------------- (out_dir / 'checkpoint').mkdir(exist_ok=True) (out_dir / 'train').mkdir(exist_ok=True) (out_dir / 'backup').mkdir(exist_ok=True) backup_project_as_zip( PROJECT_PATH, str(out_dir / 'backup' / ' code.train.%s.zip') % IDENTIFIER) log = Logger() log.open(out_dir + '/log.train.txt', mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) log.write('** some experiment setting **\n') log.write('\tSEED = %u\n' % SEED) log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH) log.write('\tout_dir = %s\n' % out_dir) log.write('\n') ## net ---------------------- log.write('** net setting **\n') cfg = Configuration() net = MaskRcnnNet(cfg).cuda() if initial_checkpoint is not None: log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint) net.load_state_dict( torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)) elif pretrain_file is not None: log.write('\tpretrained_file = %s\n' % pretrain_file) # load_pretrain_file(net, pretrain_file) log.write('%s\n\n' % (type(net))) log.write('\n') ## optimiser ---------------------------------- iter_accum = 1 batch_size = 4 ##NUM_CUDA_DEVICES*512 #256//iter_accum #512 #2*288//iter_accum num_iters = 1000 * 1000 iter_smooth = 20 iter_log = 50 iter_valid = 100 iter_save = [0, num_iters - 1] \ + list(range(0, num_iters, 100)) # 1*1000 LR = None # LR = StepLR([ (0, 0.01), (200, 0.001), (300, -1)]) optimizer = SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.001 / iter_accum, momentum=0.9, weight_decay=0.0001) start_iter = 0 start_epoch = 0. if initial_checkpoint is not None: checkpoint = torch.load( initial_checkpoint.replace('_model.pth', '_optimizer.pth')) start_iter = checkpoint['iter'] start_epoch = checkpoint['epoch'] # optimizer.load_state_dict(checkpoint['optimizer']) ## dataset ---------------------------------------- log.write('** dataset setting **\n') train_dataset = ScienceDataset( # 'train1_ids_gray_only1_500', mode='train', 'valid1_ids_gray_only1_43', mode='train', transform=train_augment) train_loader = DataLoader( train_dataset, sampler=RandomSampler(train_dataset), # sampler = ConstantSampler(train_dataset,list(range(16))), batch_size=batch_size, drop_last=True, num_workers=4, pin_memory=True, collate_fn=train_collate) valid_dataset = ScienceDataset( 'valid1_ids_gray_only1_43', mode='train', # 'debug1_ids_gray_only1_10', mode='train', transform=valid_augment) valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=batch_size, drop_last=False, num_workers=4, pin_memory=True, collate_fn=train_collate) log.write('\ttrain_dataset.split = %s\n' % (train_dataset.split)) log.write('\tvalid_dataset.split = %s\n' % (valid_dataset.split)) log.write('\tlen(train_dataset) = %d\n' % (len(train_dataset))) log.write('\tlen(valid_dataset) = %d\n' % (len(valid_dataset))) log.write('\tlen(train_loader) = %d\n' % (len(train_loader))) log.write('\tlen(valid_loader) = %d\n' % (len(valid_loader))) log.write('\tbatch_size = %d\n' % (batch_size)) log.write('\titer_accum = %d\n' % (iter_accum)) log.write('\tbatch_size*iter_accum = %d\n' % (batch_size * iter_accum)) log.write('\n') # log.write(inspect.getsource(train_augment)+'\n') # log.write(inspect.getsource(valid_augment)+'\n') # log.write('\n') if 0: # <debug> for inputs, truth_boxes, truth_labels, truth_instances, indices in valid_loader: batch_size, C, H, W = inputs.size() print(batch_size) images = inputs.cpu().numpy() for b in range(batch_size): image = (images[b].transpose((1, 2, 0)) * 255) image = np.clip(image.astype(np.float32) * 3, 0, 255) image1 = image.copy() truth_box = truth_boxes[b] truth_label = truth_labels[b] truth_instance = truth_instances[b] if truth_box is not None: for box, label, instance in zip(truth_box, truth_label, truth_instance): x0, y0, x1, y1 = box.astype(np.int32) cv2.rectangle(image, (x0, y0), (x1, y1), (0, 0, 255), 1) print(label) thresh = instance > 0.5 contour = thresh_to_inner_contour(thresh) contour = contour.astype(np.float32) * 0.5 image1 = contour[:, :, np.newaxis] * np.array( (0, 255, 0)) + (1 - contour[:, :, np.newaxis]) * image1 print('') image_show('image', image) image_show('image1', image1) cv2.waitKey(0) ## start training here! ############################################## log.write('** start training here! **\n') log.write(' optimizer=%s\n' % str(optimizer)) log.write(' momentum=%f\n' % optimizer.param_groups[0]['momentum']) log.write(' LR=%s\n\n' % str(LR)) log.write(' images_per_epoch = %d\n\n' % len(train_dataset)) log.write( ' rate iter epoch num | valid_loss | train_loss | batch_loss | time \n' ) log.write( '------------------------------------------------------------------------------------------------------------------------------------------------------------------\n' ) train_loss = np.zeros(6, np.float32) train_acc = 0.0 valid_loss = np.zeros(6, np.float32) valid_acc = 0.0 batch_loss = np.zeros(6, np.float32) batch_acc = 0.0 rate = 0 start = time.time() j = 0 i = 0 while i < num_iters: # loop over the dataset multiple times sum_train_loss = np.zeros(6, np.float32) sum_train_acc = 0.0 sum = 0 net.set_mode('train') optimizer.zero_grad() for inputs, truth_boxes, truth_labels, truth_instances, indices in train_loader: batch_size = len(indices) i = j / iter_accum + start_iter epoch = (i - start_iter) * batch_size * iter_accum / len( train_dataset) + start_epoch num_products = epoch * len(train_dataset) if i % iter_valid == 0: net.set_mode('valid') valid_loss, valid_acc = evaluate(net, valid_loader) net.set_mode('train') print('\r', end='', flush=True) log.write( '%0.4f %5.1f k %6.2f %4.1f m | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %s\n' % ( \ rate, i / 1000, epoch, num_products / 1000000, valid_loss[0], valid_loss[1], valid_loss[2], valid_loss[3], valid_loss[4], valid_loss[5], # valid_acc, train_loss[0], train_loss[1], train_loss[2], train_loss[3], train_loss[4], train_loss[5], # train_acc, batch_loss[0], batch_loss[1], batch_loss[2], batch_loss[3], batch_loss[4], batch_loss[5], # batch_acc, time_to_str((time.time() - start) / 60))) time.sleep(0.01) # if 1: if i in iter_save: torch.save(net.state_dict(), out_dir + '/checkpoint/%08d_model.pth' % (i)) torch.save( { 'optimizer': optimizer.state_dict(), 'iter': i, 'epoch': epoch, }, out_dir + '/checkpoint/%08d_optimizer.pth' % (i)) # learning rate schduler ------------- if LR is not None: lr = LR.get_rate(i) if lr < 0: break adjust_learning_rate(optimizer, lr / iter_accum) rate = get_learning_rate(optimizer)[0] * iter_accum # one iteration update ------------- inputs = Variable(inputs).cuda() net(inputs, truth_boxes, truth_labels, truth_instances) loss = net.loss(inputs, truth_boxes, truth_labels, truth_instances) if 1: # <debug> debug_and_draw(net, inputs, truth_boxes, truth_labels, truth_instances, mode='test') # masks = (probs>0.5).float() # acc = dice_loss(masks, labels) # accumulated update loss.backward() if j % iter_accum == 0: # torch.nn.utils.clip_grad_norm(net.parameters(), 1) optimizer.step() optimizer.zero_grad() # print statistics ------------ batch_acc = 0 # acc[0][0] batch_loss = np.array(( loss.cpu().data.numpy()[0], net.rpn_cls_loss.cpu().data.numpy()[0], net.rpn_reg_loss.cpu().data.numpy()[0], net.rcnn_cls_loss.cpu().data.numpy()[0], net.rcnn_reg_loss.cpu().data.numpy()[0], net.mask_cls_loss.cpu().data.numpy()[0], )) sum_train_loss += batch_loss sum_train_acc += batch_acc sum += 1 if i % iter_smooth == 0: train_loss = sum_train_loss / sum train_acc = sum_train_acc / sum sum_train_loss = np.zeros(6, np.float32) sum_train_acc = 0. sum = 0 print( '\r%0.4f %5.1f k %6.2f %4.1f m | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %s %d,%d,%s' % ( \ rate, i / 1000, epoch, num_products / 1000000, valid_loss[0], valid_loss[1], valid_loss[2], valid_loss[3], valid_loss[4], valid_loss[5], # valid_acc, train_loss[0], train_loss[1], train_loss[2], train_loss[3], train_loss[4], train_loss[5], # train_acc, batch_loss[0], batch_loss[1], batch_loss[2], batch_loss[3], batch_loss[4], batch_loss[5], # batch_acc, time_to_str((time.time() - start) / 60), i, j, str(inputs.size())), end='', flush=True) j = j + 1 pass # -- end of one data loader -- pass # -- end of all iterations -- if 1: # save last torch.save(net.state_dict(), out_dir + '/checkpoint/%d_model.pth' % (i)) torch.save( { 'optimizer': optimizer.state_dict(), 'iter': i, 'epoch': epoch, }, out_dir + '/checkpoint/%d_optimizer.pth' % (i)) log.write('\n')
def run_submit(evaluate_mode): c = config['submit'] n_worker = c.getint('n_worker') data_src = json.loads(c.get('data_src')) data_major = json.loads(c.get('data_major')) data_sub = json.loads(c.get('data_sub')) cc = config['maskrcnn'] class_map = json.loads(cc.get('classes_map')) # generate metafiles such as /npys and /overlays out_dir = TASK_OUTDIR submit_dir = get_submit_dir(evaluate_mode) # initial_checkpoint = PREDICT_CP_FILE os.makedirs(submit_dir +'/overlays', exist_ok=True) os.makedirs(submit_dir +'/npys', exist_ok=True) #os.makedirs(out_dir +'/checkpoint', exist_ok=True) #os.makedirs(out_dir +'/backup', exist_ok=True) #backup_project_as_zip(PROJECT_PATH, out_dir +'/backup/code.%s.zip'%IDENTIFIER) log = Logger() log.open(out_dir+'/log.evaluate.txt',mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) log.write('** some experiment setting **\n') log.write('\tSEED = %u\n' % SEED) log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH) log.write('\tout_dir = %s\n' % out_dir) log.write('\n') ## net ------------------------------ cfg = Configuration() net = MaskRcnnNet(cfg).cuda() epoch = load_ckpt(out_dir, net) if epoch == 0: print("Aborted: checkpoint not found!") return ''' if initial_checkpoint is not None: log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint) net.load_state_dict(torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)) ''' # print(torch_summarize(net)) log.write('%s\n\n'%(type(net))) log.write('\n') ## dataset ---------------------------------------- log.write('** dataset setting **\n') if evaluate_mode == 'test': #output_csv_path = DATA_DIR +'/split/test.csv' #output_csv_path = '../bowl_classifier/stage2_test.csv' output_csv_path = config['param'].get('CSV_PATH') print('output_csv_path ==> ' + output_csv_path) print(config['param'].get('category')) print(config['param'].get('sub_category')) test_csv = pd.read_csv(output_csv_path) if config['param'].get('category') != 'None': test_csv = test_csv[test_csv['major_category']==config['param'].get('category')] if config['param'].get('sub_category') != 'None': test_csv = test_csv[test_csv['sub_category']==config['param'].get('sub_category')] if (config['param'].get('category') != 'None') & (config['param'].get('sub_category') != 'None'): print('[compAI error], dont supprt filter both major category and sub category') #test_csv = test_csv[test_csv['major_category']=='Histology'] print(output_csv_path) print(test_csv.head()) test_dataset = ScienceDataset( test_csv, mode='test', # 'train1_ids_gray_only1_500', mode='test', #'valid1_ids_gray_only1_43', mode='test', #'debug1_ids_gray_only_10', mode='test', # 'test1_ids_gray2_53', mode='test', transform = augment.submit_augment) test_loader = DataLoader( test_dataset, sampler = SequentialSampler(test_dataset), batch_size = 1, drop_last = False, num_workers = 4, pin_memory = True, collate_fn = submit_collate) log.write('\ttest_dataset.split = %s\n'%(output_csv_path)) log.write('\tlen(test_dataset) = %d\n'%(len(test_dataset))) log.write('\n') ## start evaluation here! ############################################## log.write('** start evaluation here! **\n') start = timer() pred_masks = [] true_masks = [] IoU = [] label_counts = [] predicts_counts = [] predict_image_labels =[] total_prediction =[] confidence = [] test_num = len(test_loader.dataset) for i, (inputs, images, indices, ids) in enumerate(test_loader, 0): # print('\rpredicting: %10d/%d (%0.0f %%) %0.2f min'%(i, test_num-1, 100*i/(test_num-1), # (timer() - start) / 60), end='',flush=True) print('\rpredicting: %10d/%d (%0.0f %%) %0.2f min'%(i+1, test_num, 100*i/(test_num), (timer() - start) / 60), end='',flush=True) time.sleep(0.01) net.set_mode('test') with torch.no_grad(): inputs = Variable(inputs).cuda() net(inputs ) revert(net, images) #unpad, undo test-time augment etc .... ##save results --------------------------------------- batch_size = len(indices) assert(batch_size==1) #note current version support batch_size==1 for variable size input #to use batch_size>1, need to fix code for net.windows, etc batch_size,C,H,W = inputs.size() inputs = inputs.data.cpu().numpy() window = net.rpn_window rpn_probs_flat = net.rpn_probs_flat.data.cpu().numpy() rpn_logits_flat = net.rpn_logits_flat.data.cpu().numpy() rpn_deltas_flat = net.rpn_deltas_flat.data.cpu().numpy() detections = net.detections rpn_proposals = net.rpn_proposals # print ('detections shape', detections.shape) masks = net.masks keeps = net.keeps category_labels = net.category_labels label_sorteds = net.label_sorted # print ('masks shape', len(masks)) # print ('batch_size', batch_size) for b in range(batch_size): #image0 = (inputs[b].transpose((1,2,0))*255).astype(np.uint8) image = images[b] height,width = image.shape[:2] # print ('hw', height, width) mask = masks[b] keep = keeps[b] category_label = category_labels[b] # label_sorted = np.asarray(list(label_sorted[b])) label_sorted = label_sorteds[b] # print ('sum_label',sum_label ) sum_label = 0 for i in range(int((len(label_sorted)/2))): sum_label = sum_label + label_sorted[i*2+1] # category_label = [] # print ('category_label', category_label) if category_label == []: category_image = 'NAN' nms_label_count = 0 else: category_image = [key for key,value in class_map.items() if value == category_label][0] nms_label_count = label_sorted[1] pred_masks.append(mask) predict_image_labels.append(category_image) if evaluate_mode == 'train': IoU_one, label_counts_one, predicts_counts_one = evaluate_IoU(mask, true_mask) IoU.append(IoU_one) label_counts.append(label_counts_one) predicts_counts.append(predicts_counts_one) confidence_one = round(nms_label_count / (sum_label+0.0000001), 4) confidence.append(confidence_one) prob = rpn_probs_flat[b] delta = rpn_deltas_flat[b] # detection = detections[b] image_rcnn_detection_nms = draw_rcnn_detection_nms(image, detections, threshold=0.1) # image_rpn_proposal_before_nms = draw_rpn_proposal_before_nms(image,prob,delta,window,0.995) image_rpn_detection_nms = draw_rcnn_detection_nms(image, rpn_proposals, threshold=0.1) contour_overlay = multi_mask_to_contour_overlay(cfg, mask, detections,keep, image, color=[0,255,0]) color_overlay = multi_mask_to_color_overlay(mask, color='summer') if evaluate_mode == 'train': color_overlay_true = multi_mask_to_color_overlay(true_mask, image, color='summer') color_overlay = multi_mask_to_color_overlay(mask, color='summer') color1_overlay = multi_mask_to_contour_overlay(cfg, mask, detections, keep, color_overlay, color=[255,255,255]) image_rcnn_detection_nms = image_rcnn_detection_nms[:height,:width] # image_rpn_proposal_before_nms = image_rpn_proposal_before_nms[:height,:width] if evaluate_mode == 'train': #all = np.hstack((image,contour_overlay, image_rpn_detection_nms, image_rcnn_detection_nms, image_rpn_detection_nms, color1_overlay, color_overlay_true)) all = np.hstack((image,image_rpn_detection_nms, image_rcnn_detection_nms, image_rpn_detection_nms, contour_overlay, color_overlay_true)) else: all = np.hstack((image, color1_overlay, image_rpn_detection_nms, image_rcnn_detection_nms, contour_overlay)) # -------------------------------------------- id = test_dataset.ids[indices[b]] name =id.split('/')[-1] #draw_shadow_text(overlay_mask, 'mask', (5,15),0.5, (255,255,255), 1) np.save(submit_dir + '/npys/%s.npy'%(name),mask) #cv2.imwrite(out_dir +'/submit/npys/%s.png'%(name),color_overlay) # always save overlay images cv2.imwrite(submit_dir +'/overlays/%s.png'%(name),all) #psd os.makedirs(submit_dir +'/psds/%s'%name, exist_ok=True) cv2.imwrite(submit_dir +'/psds/%s/%s.png'%(name,name),image) cv2.imwrite(submit_dir +'/submit/psds/%s/%s.mask.png'%(name,name),color_overlay) cv2.imwrite(submit_dir +'/submit/psds/%s/%s.contour.png'%(name,name),contour_overlay) # image_show('all',all) # image_show('image',image) # image_show('multi_mask_overlay',multi_mask_overlay) # image_show('contour_overlay',contour_overlay) # cv2.waitKey(1) assert(test_num == len(test_loader.sampler)) log.write('initial_checkpoint = %s\n'%(Path(ckpt_path(out_dir)).name)) log.write('test_num = %d\n'%(test_num)) log.write('\n') if evaluate_mode == 'train': ids = test_csv['image_id'] label_column = config['train'].get('label_column') major_category = test_csv['major_category'] sub_category = test_csv['sub_category'] # answer = [] answer = predict_image_labels == major_category print ('answer', answer) print ('predict_image_labels', predict_image_labels) df_predict = pd.DataFrame({'image_id' : ids, 'pred_mask': pred_masks , 'true_mask': true_masks, 'major_category' : major_category, 'sub_category' : sub_category,'IoU' : IoU , 'label_counts' : label_counts, 'predicts_counts' : predicts_counts, 'predict_category': predict_image_labels, 'yes_or_no': answer, 'confidence': confidence}) # df_predict= df_predict.assign(label_counts=0) # df_predict= df_predict.assign(predicts_counts=0) # df_predict= df_predict.assign(ap=0) # for i in range(df_predict.shape[0]): # df_predict.loc[i, ['ap', 'label_counts', 'predicts_counts']]= evaluate_water(df_predict.loc[:,'pred_mask'].values.tolist()[i], df_predict.loc[:,'true_mask'].values.tolist()[i]) IoU_mean = df_predict['IoU'] IoU_his = df_predict.loc[df_predict['major_category']=='Histology', ['IoU']] IoU_flo = df_predict.loc[df_predict['major_category']=='Fluorescence', ['IoU']] IoU_bri = df_predict.loc[df_predict['major_category']=='Brightfield', ['IoU']] # print ('Major Category IoU:\n') # print ('IoU(%d):'%len(IoU_mean),IoU_mean.mean()) # print ('IoU_Histology(%d):'%len(IoU_his),IoU_his.mean().values[0]) # print ('IoU_Fluorescence(%d):'%len(IoU_flo),IoU_flo.mean().values[0]) # print ('IoU_Brightfield(%d):'%len(IoU_bri),IoU_bri.mean().values[0]) log.write('Major Category IoU:\n') log.write('IoU(%d):%s\n'%(len(IoU_mean),IoU_mean.mean())) log.write('IoU_Histology(%d):%s\n'%(len(IoU_his),IoU_his.mean().values[0])) log.write('IoU_Fluorescence(%d):%s\n'%(len(IoU_flo),IoU_flo.mean().values[0])) log.write('IoU_Brightfield(%d):%s\n'%(len(IoU_bri),IoU_bri.mean().values[0])) log.write('\n') IoU_he = df_predict.loc[df_predict['sub_category']=='HE', ['IoU']] IoU_flo_sub = df_predict.loc[df_predict['sub_category']=='Fluorescence', ['IoU']] IoU_bri_sub = df_predict.loc[df_predict['sub_category']=='Brightfield', ['IoU']] IoU_clo_sub = df_predict.loc[df_predict['sub_category']=='Cloud', ['IoU']] IoU_dro_sub = df_predict.loc[df_predict['sub_category']=='Drosophilidae', ['IoU']] IoU_ihc_sub = df_predict.loc[df_predict['sub_category']=='IHC', ['IoU']] # print ('Sub Category IoU:\n') log.write('Sub Category IoU:\n') # print ('IoU_he(%d):'%len(IoU_he),IoU_he.mean().values[0]) # print ('IoU_Fluorescence(%d):'%len(IoU_flo_sub),IoU_flo_sub.mean().values[0]) # print ('IoU_Brightfield(%d):'%len(IoU_bri_sub),IoU_bri_sub.mean().values[0]) # print ('IoU_Cloud(%d):'%len(IoU_clo_sub),IoU_clo_sub.mean().values[0]) # print ('IoU_Drosophilidae(%d):'%len(IoU_dro_sub),IoU_dro_sub.mean().values[0]) # print ('IoU_IHC(%d):'%len(IoU_ihc_sub),IoU_ihc_sub.mean().values[0]) log.write('IoU_he(%d):%s\n'%(len(IoU_he),IoU_he.mean().values[0])) log.write('IoU_Fluorescence(%d):%s\n'%(len(IoU_flo_sub),IoU_flo_sub.mean().values[0])) log.write('IoU_Brightfield(%d):%s\n'%(len(IoU_bri_sub),IoU_bri_sub.mean().values[0])) log.write('IoU_Cloud(%d):%s\n'%(len(IoU_clo_sub),IoU_clo_sub.mean().values[0])) log.write('IoU_Drosophilidae(%d):%s\n'%(len(IoU_dro_sub),IoU_dro_sub.mean().values[0])) log.write('IoU_IHC(%d):%s\n'%(len(IoU_ihc_sub),IoU_ihc_sub.mean().values[0])) log.write('\n') df_predict = df_predict.drop(['pred_mask', 'true_mask'], axis=1) # print (df_predict) prediction_csv_file = submit_dir + '/prediction.csv' print('prediction_csv_file ==> '+prediction_csv_file) df_predict.to_csv(prediction_csv_file) else: ids = test_csv['image_id'] df_predict = pd.DataFrame({'image_id' : ids, 'predict_image_labels': predict_image_labels, 'confidence': confidence}) prediction_csv_file = submit_dir + '/prediction.csv' df_predict.to_csv(prediction_csv_file)
def run_predict(): cfg = Configuration() f_eval = TrainFolder(os.path.join(cfg.result_dir, cfg.model_name)) out_dir = f_eval.folder_name initial_checkpoint = os.path.join(f_eval.checkpoint_dir, cfg.valid_checkpoint) # augment ----------------------------------------------------------------------------------------------------- split = cfg.valid_split #'valid_black_white_44'#'test_black_white_53' # #start experiments here! ########################################################### log = Logger() log.open(out_dir + '/log.evaluate.txt', mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) log.write('** some experiment setting **\n') log.write('\tSEED = %u\n' % SEED) log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH) log.write('\tout_dir = %s\n' % out_dir) log.write('\n') ## net ------------------------------ cfg.rcnn_test_nms_pre_score_threshold = 0.5 cfg.mask_test_nms_pre_score_threshold = cfg.rcnn_test_nms_pre_score_threshold net = MaskRcnnNet(cfg).cuda() if initial_checkpoint is not None: log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint) net.load_state_dict( torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)) log.write('%s\n\n' % (type(net))) log.write('\n') ## dataset ---------------------------------------- log.write('** dataset setting **\n') ids = read_list_from_file(os.path.join(cfg.split_dir, split), comment='#') log.write('\ttsplit = %s\n' % (split)) log.write('\tlen(ids) = %d\n' % (len(ids))) log.write('\n') for tag_name, do_test_augment, undo_test_augment, params in cfg.test_augments: ## setup -------------------------- tag = 'box_%s' % tag_name os.makedirs(os.path.join(out_dir, 'predict', tag, 'overlays'), exist_ok=True) os.makedirs(os.path.join(out_dir, 'predict', tag, 'predicts'), exist_ok=True) os.makedirs(os.path.join(out_dir, 'predict', tag, 'rcnn_proposals'), exist_ok=True) os.makedirs(os.path.join(out_dir, 'predict', tag, 'detections'), exist_ok=True) os.makedirs(os.path.join(out_dir, 'predict', tag, 'masks'), exist_ok=True) os.makedirs(os.path.join(out_dir, 'predict', tag, 'instances'), exist_ok=True) log.write('** start evaluation here @%s! **\n' % tag) for i in range(len(ids)): folder, name = ids[i].split('/')[-2:] if os.path.isfile( os.path.join(out_dir, 'predict', tag, 'detections', '%s.npy' % name)): print('skip %03d %s' % (i, name)) continue print('%03d %s' % (i, name)) image = cv2.imread( os.path.join(cfg.data_dir, folder, 'images', '%s.png' % name), cv2.IMREAD_COLOR) ## augment -------------------------------------- augment_image = do_test_augment(image, proposal=None, **params) net.set_mode('test') with torch.no_grad(): input = torch.from_numpy(augment_image.transpose( (2, 0, 1))).float().div(255).unsqueeze(0) input = Variable(input).cuda() net.forward(input) rcnn_proposal, detection, mask, instance = undo_test_augment( net, image, **params) ## save results --------------------------------------- #np.save(os.path.join(out_dir, 'predict', tag, 'rcnn_proposals', '%s.npy'%name),rcnn_proposal) #np.save(os.path.join(out_dir, 'predict', tag, 'masks', '%s.npy'%name),mask) np.save( os.path.join(out_dir, 'predict', tag, 'detections', '%s.npy' % name), detection) #np.save(os.path.join(out_dir, 'predict', tag, 'instances', '%s.npy'%name),instance) if 0: threshold = cfg.rcnn_test_nms_pre_score_threshold all2 = draw_predict_mask(threshold, image, mask, detection) ## save #cv2.imwrite(os.path.join(out_dir, 'predict', tag, 'predicts', '%s.png'%name), all2) if 1: color_overlay = multi_mask_to_color_overlay(mask) color1_overlay = multi_mask_to_contour_overlay( mask, color_overlay) contour_overlay = multi_mask_to_contour_overlay( mask, image, [0, 255, 0]) mask_score = instance.sum(0) #mask_score = cv2.cvtColor((np.clip(mask_score,0,1)*255).astype(np.uint8),cv2.COLOR_GRAY2BGR) mask_score = cv2.cvtColor( (mask_score / mask_score.max() * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR) all = np.hstack((image, contour_overlay, color1_overlay, mask_score)).astype(np.uint8) #image_show('overlays',all) #psd #os.makedirs(os.path.join(out_dir, 'predict', 'overlays'), exist_ok=True) #cv2.imwrite(os.path.join(out_dir, 'predict', tag, 'overlays', '%s.png'%name),all) #os.makedirs(os.path.join(out_dir, 'predict', tag, 'overlays', name), exist_ok=True) #cv2.imwrite(os.path.join(out_dir, 'predict', tag, 'overlays', name, "%s.png"%name),image) #cv2.imwrite(os.path.join(out_dir, 'predict', tag, 'overlays', name, "%s.mask.png"%name),color_overlay) #cv2.imwrite(os.path.join(out_dir, 'predict', tag, 'overlays', name, "%s.contour.png"%name),contour_overlay) #assert(test_num == len(test_loader.sampler)) log.write('-------------\n') log.write('initial_checkpoint = %s\n' % (initial_checkpoint)) log.write('tag=%s\n' % tag) log.write('\n')
def main(): parser = argparse.ArgumentParser( description='Script to run segmentation models') parser.add_argument('--batch_size', help='desired batch size for training', action='store', type=int, dest='batch_size', default=1) parser.add_argument('--net', help='models to train on', action='store', dest='models', default='mask_rcnn') parser.add_argument('--learning_rate', help='starting learning rate', action='store', type=float, dest='learning_rate', default=0.001) parser.add_argument('--optimizer', help='adam or sgd optimizer', action='store', dest='optimizer', default='sgd') parser.add_argument('--random_seed', help='seed for random initialization', action='store', type=int, dest='seed', default=100) parser.add_argument('--load_model', help='load models from file', action='store_true', default=False) parser.add_argument('--predict', help='only predict', action='store_true', default=False) parser.add_argument('--print_every', help='print loss every print_every steps', action='store', type=int, default=10) parser.add_argument('--save_model_every', help='save models every save_model_every steps', action='store', type=int, default=100) parser.add_argument('--input_width', help='input image width to a net', action='store', type=int, default=128) parser.add_argument('--input_height', help='input image height to a net', action='store', type=int, default=128) parser.add_argument( '--pretrained', help='load pretrained models when doing transfer learning', action='store_true', default=True) parser.add_argument('--num_iters', help='total number of iterations for training', action='store', type=int, default=100000) parser.add_argument( '--is_validation', help='whether or not calculate validation when training', action='store_true', default=False) parser.add_argument( '--iter_valid', help='calculate validation loss every validation_every steps', action='store', type=int, default=100) parser.add_argument('--num_workers', help='number of workers for loading dataset', action='store', type=int, default=4) parser.add_argument('--train_split', help='the train dataset split', action='store', default='ids_train') parser.add_argument('--valid_split', help='the valid dataset split', action='store', default='ids_valid') parser.add_argument('--visualize_split', help='the visualize dataset split', action='store', default='ids_visualize') parser.add_argument('--iter_accum', help='iter_accum', action='store', type=int, default=1) parser.add_argument('--result_dir', help='result dir for saving logs and data', action='store', default='../results') parser.add_argument('--data_dir', help='the root dir to store data', action='store', default='../data/2018-4-12_dataset') parser.add_argument('--initial_checkpoint', help='check point to load model', action='store', default=None) parser.add_argument('--image_folder_train', help='the folder containing images for training', action='store', default='stage1') parser.add_argument('--image_folder_valid', help='the folder containing images for validation', action='store', default='stage1') parser.add_argument('--image_folder_visualize', help='the folder containing images for visualization', action='store', default='visualize') parser.add_argument('--image_folder_test', help='the folder containing images for testing', action='store', default='stage1_test') parser.add_argument('--masks_folder_train', help='the folder containing masks for training', action='store', default='stage1_masks') parser.add_argument('--masks_folder_valid', help='the folder containing masks for validation', action='store', default='stage1_masks') parser.add_argument('--masks_folder_visualize', help='the folder containing masks for visualization', action='store', default='visualize_masks') parser.add_argument( '--color_scheme', help='the color scheme for imread, must be \'color\' or \'gray\'', action='store', default='gray') parser.add_argument('--masknet', help='mask net', action='store', default='4conv') parser.add_argument('--feature_channels', help='feature channels', action='store', type=int, default=128) parser.add_argument('--train_box_only', help='train_box_only', action='store_true', default=False) parser.add_argument('--run', help='exit debug mode', action='store_true', default=False) args = parser.parse_args() debug = False # debug if not args.run: args.batch_size = 1 args.print_every = 1 args.learning_rate = 0.002 args.iter_valid = 1 args.is_validation = False args.train_split = 'ids_train' args.input_width = 256 args.input_height = 256 args.iter_accum = 1 args.seed = 0 args.num_workers = 1 args.save_model_every = 1000 debug = True os.makedirs(args.result_dir, exist_ok=True) print('data_dir', args.data_dir) print('result_dir', args.result_dir) if args.seed: torch.manual_seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) log = Logger() log.open(args.result_dir + '/log.train.txt', mode='a') if args.color_scheme == 'color': color_scheme = cv2.IMREAD_COLOR image_channel = 3 elif args.color_scheme == 'gray': color_scheme = cv2.IMREAD_GRAYSCALE image_channel = 1 else: raise NotImplementedError # net ---------------------- log.write('** net setting **\n') cfg = Configuration() net = MaskNet(cfg, image_channel, args.masknet, args.feature_channels, args.train_box_only) net = net.cuda() if USE_CUDA else net log.write('** dataset setting **\n') WIDTH, HEIGHT = args.input_width, args.input_height aug_image_only = iaa.Sequential([ # iaa.GaussianBlur((0, 0.25)), iaa.AdditiveGaussianNoise(scale=(5, 15)), iaa.AverageBlur(k=(1, 2)), # iaa.FrequencyNoiseAlpha() ]) aug_both = iaa.Sequential([ iaa.Affine(shear=(-30, 30), order=0, mode='reflect'), # iaa.PiecewiseAffine(scale=(0.1, 0.1), order=0, mode='symmetric') # iaa.Affine(translate_px={"x": (-40, 40)}) # iaa.PerspectiveTransform(scale=0.1) ]) def train_augment(image, multi_mask, meta, index): H, W = image.shape[0], image.shape[1] if HEIGHT > H or WIDTH > W: scale = max((HEIGHT + 1) / H, (WIDTH + 1) / W) image, multi_mask = fix_resize_transform2(image, multi_mask, int(scale * H), int(scale * W)) image = linear_normalize_intensity_augment(image) image, multi_mask = random_shift_scale_rotate_transform2( image, multi_mask, shift_limit=[0, 0], scale_limit=[1 / 1.5, 1.5], rotate_limit=[-45, 45], borderMode=cv2.BORDER_REFLECT_101, u=0.5) # borderMode=cv2.BORDER_CONSTANT image, multi_mask = random_crop_transform2(image, multi_mask, WIDTH, HEIGHT, u=0.5) image, multi_mask = random_horizontal_flip_transform2( image, multi_mask, 0.5) image, multi_mask = random_vertical_flip_transform2( image, multi_mask, 0.5) image, multi_mask = random_rotate90_transform2(image, multi_mask, 0.5) image = image.reshape(image.shape[0], image.shape[1], -1) aug_both_det = aug_both.to_deterministic() image = aug_both_det.augment_image(image) multi_mask = aug_both_det.augment_image(multi_mask) image = aug_image_only.augment_image(image) multi_mask = relabel_multi_mask(multi_mask) input = torch.from_numpy(image.transpose((2, 0, 1))).float().div(255) box, label, instance = multi_mask_to_annotation(multi_mask) return input, box, label, instance, meta, index def valid_augment(image, multi_mask, meta, index): H, W = image.shape[0], image.shape[1] if HEIGHT > H or WIDTH > W: scale = max((HEIGHT + 1) / H, (WIDTH + 1) / W) image, multi_mask = fix_resize_transform2(image, multi_mask, int(scale * H), int(scale * W)) image, multi_mask = fix_crop_transform2(image, multi_mask, -1, -1, WIDTH, HEIGHT) # --------------------------------------- H, W = image.shape[0], image.shape[1] input = torch.from_numpy(image.reshape(H, W, -1).transpose( (2, 0, 1))).float().div(255) box, label, instance = multi_mask_to_annotation(multi_mask) return input, box, label, instance, meta, index def train_collate(batch): batch_size = len(batch) inputs = torch.stack([batch[b][0] for b in range(batch_size)], 0) boxes = [batch[b][1] for b in range(batch_size)] labels = [batch[b][2] for b in range(batch_size)] instances = [batch[b][3] for b in range(batch_size)] metas = [batch[b][4] for b in range(batch_size)] indices = [batch[b][5] for b in range(batch_size)] return [inputs, boxes, labels, instances, metas, indices] train_dataset = ScienceDataset(data_dir=args.data_dir, image_set=args.train_split, image_folder=args.image_folder_train, masks_folder=args.masks_folder_train, color_scheme=color_scheme, transform=train_augment, mode='train') train_loader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=args.batch_size, drop_last=True, num_workers=args.num_workers, pin_memory=True, collate_fn=train_collate) valid_dataset = ScienceDataset(data_dir=args.data_dir, image_set=args.valid_split, image_folder=args.image_folder_valid, masks_folder=args.masks_folder_valid, color_scheme=color_scheme, transform=valid_augment, mode='valid') valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=1, drop_last=False, num_workers=args.num_workers, pin_memory=True, collate_fn=train_collate) visualize_dataset = ScienceDataset( data_dir=args.data_dir, image_set=args.visualize_split, image_folder=args.image_folder_visualize, masks_folder=args.masks_folder_visualize, color_scheme=cv2.IMREAD_GRAYSCALE, transform=valid_augment, mode='valid') visualize_loader = DataLoader(visualize_dataset, sampler=SequentialSampler(visualize_dataset), batch_size=1, drop_last=False, num_workers=args.num_workers, pin_memory=True, collate_fn=train_collate) log.write('\tWIDTH, HEIGHT = %d, %d\n' % (WIDTH, HEIGHT)) log.write('\ttrain_dataset.split = %s\n' % train_dataset.image_set) log.write('\tvalid_dataset.split = %s\n' % valid_dataset.image_set) log.write('\tlen(train_dataset) = %d\n' % (len(train_dataset))) log.write('\tlen(valid_dataset) = %d\n' % (len(valid_dataset))) log.write('\tlen(train_loader) = %d\n' % (len(train_loader))) log.write('\tlen(valid_loader) = %d\n' % (len(valid_loader))) log.write('\tbatch_size = %d\n' % args.batch_size) log.write('\titer_accum = %d\n' % args.iter_accum) log.write('\tbatch_size*iter_accum = %d\n' % (args.batch_size * args.iter_accum)) log.write('\n') LR = None # LR = StepLR([ (0, 0.01), (200, 0.001), (300, -1)]) if args.optimizer == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=args.learning_rate / args.iter_accum, momentum=0.9, weight_decay=0.0001) elif args.optimizer == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.learning_rate / args.iter_accum, weight_decay=0.0001) else: raise NotImplementedError trainer = Trainer(net=net, train_loader=train_loader, valid_loader=valid_loader, visualize_loader=visualize_loader, optimizer=optimizer, learning_rate=args.learning_rate, LR=LR, logger=log, iter_accum=args.iter_accum, num_iters=args.num_iters, iter_smooth=args.print_every, iter_log=args.print_every, iter_valid=args.iter_valid, images_per_epoch=len(train_dataset), initial_checkpoint=args.initial_checkpoint, pretrain_file=None, debug=debug, is_validation=args.is_validation, out_dir=args.result_dir) trainer.run_train()
def run_train(): out_dir = RESULTS_DIR + '/mask-rcnn-50-gray500-02' initial_checkpoint = RESULTS_DIR + '/mask-rcnn-50-gray500-02/checkpoint/best_model.pth' pretrain_file = RESULTS_DIR + '/mask-rcnn-50-gray500-02/checkpoint/best_model.pth' #None #RESULTS_DIR + '/mask-single-shot-dummy-1a/checkpoint/00028000_model.pth' skip = ['crop', 'mask'] ## setup ----------------- os.makedirs(out_dir + '/checkpoint', exist_ok=True) os.makedirs(out_dir + '/train', exist_ok=True) log = Logger() log.open(out_dir + '/log.train.txt', mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) log.write('** some experiment setting **\n') log.write('\tSEED = %u\n' % SEED) log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH) log.write('\tout_dir = %s\n' % out_dir) log.write('\n') ## net ---------------------- log.write('** net setting **\n') cfg = Configuration() net = MaskRcnnNet(cfg).cuda() if initial_checkpoint is not None: log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint) net.load_state_dict( torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)) #with open(out_dir +'/checkpoint/configuration.pkl','rb') as pickle_file: # cfg = pickle.load(pickle_file) if pretrain_file is not None: log.write('\tpretrain_file = %s\n' % pretrain_file) net.load_pretrain(pretrain_file, skip) log.write('%s\n\n' % (type(net))) log.write('%s\n' % (net.version)) log.write('\n') ## optimiser ---------------------------------- iter_accum = 1 batch_size = 8 num_iters = 1000 * 1000 iter_smooth = 20 iter_log = 50 iter_valid = 100 iter_save = [0, num_iters - 1] + list(range(0, num_iters, 500)) LR = None #LR = StepLR([ (0, 0.01), (200, 0.001), (300, -1)]) optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.01 / iter_accum, momentum=0.9, weight_decay=0.0001) start_iter = 0 start_epoch = 0. log.write('** dataset setting **\n') train_dataset = ScienceDataset( 'train1_ids_gray2_500', mode='train', #'debug1_ids_gray_only_10', mode='train', #'disk0_ids_dummy_9', mode='train', #12 #'train1_ids_purple_only1_101', mode='train', #12 #'merge1_1', mode='train', transform=train_augment) train_loader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=batch_size, drop_last=True, num_workers=4, pin_memory=True, collate_fn=train_collate) valid_dataset = ScienceDataset( 'valid1_ids_gray2_43', mode='train', #'debug1_ids_gray_only_10', mode='train', #'disk0_ids_dummy_9', mode='train', #'train1_ids_purple_only1_101', mode='train', #12 #'merge1_1', mode='train', transform=valid_augment) valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=batch_size, drop_last=False, num_workers=4, pin_memory=True, collate_fn=train_collate) log.write('\tWIDTH, HEIGHT = %d, %d\n' % (WIDTH, HEIGHT)) log.write('\ttrain_dataset.split = %s\n' % (train_dataset.split)) log.write('\tvalid_dataset.split = %s\n' % (valid_dataset.split)) log.write('\tlen(train_dataset) = %d\n' % (len(train_dataset))) log.write('\tlen(valid_dataset) = %d\n' % (len(valid_dataset))) log.write('\tlen(train_loader) = %d\n' % (len(train_loader))) log.write('\tlen(valid_loader) = %d\n' % (len(valid_loader))) log.write('\tbatch_size = %d\n' % (batch_size)) log.write('\titer_accum = %d\n' % (iter_accum)) log.write('\tbatch_size*iter_accum = %d\n' % (batch_size * iter_accum)) log.write('\n') log.write('** start training here! **\n') log.write(' optimizer=%s\n' % str(optimizer)) log.write(' momentum=%f\n' % optimizer.param_groups[0]['momentum']) log.write(' LR=%s\n\n' % str(LR)) log.write(' images_per_epoch = %d\n\n' % len(train_dataset)) log.write( ' rate current_iter epoch num | valid_loss | train_loss | batch_loss | time \n' ) log.write( '-------------------------------------------------------------------------------------------------------------------------------\n' ) train_loss = np.zeros(6, np.float32) train_acc = 0.0 valid_loss = np.zeros(6, np.float32) batch_loss = np.zeros(6, np.float32) batch_acc = 0.0 rate = 0 start = timer() j = 0 current_iter = 0 last_saved_model_filepath = None while current_iter < num_iters: # loop over the dataset multiple times sum_train_loss = np.zeros(6, np.float32) sum_train_acc = 0.0 sum = 0 net.set_mode('train') optimizer.zero_grad() for inputs, truth_boxes, truth_labels, truth_instances, metas, indices in train_loader: if all(len(b) == 0 for b in truth_boxes): continue batch_size = len(indices) current_iter = j / iter_accum + start_iter epoch = (current_iter - start_iter) * batch_size * iter_accum / len( train_dataset) + start_epoch num_products = epoch * len(train_dataset) if current_iter % iter_valid == 0: net.set_mode('valid') valid_loss = evaluate(net, valid_loader) net.set_mode('train') print('\r', end='', flush=True) log.write('%0.4f %5.1f k %6.1f %4.1f m | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %s\n' % (\ rate, current_iter/1000, epoch, num_products/1000000, valid_loss[0], valid_loss[1], valid_loss[2], valid_loss[3], valid_loss[4], valid_loss[5],#valid_acc, train_loss[0], train_loss[1], train_loss[2], train_loss[3], train_loss[4], train_loss[5],#train_acc, batch_loss[0], batch_loss[1], batch_loss[2], batch_loss[3], batch_loss[4], batch_loss[5],#batch_acc, time_to_str((timer() - start)/60))) log_losses(train_loss=train_loss, valid_loss=valid_loss, step=current_iter) time.sleep(0.01) if current_iter in iter_save: torch.save( net.state_dict(), out_dir + '/checkpoint/%08d_model.pth' % (current_iter)) """ torch.save({ 'optimizer': optimizer.state_dict(), 'current_iter': current_iter, 'epoch': epoch, }, out_dir + '/checkpoint/%08d_optimizer.pth' % (current_iter)) """ with open(out_dir + '/checkpoint/configuration.pkl', 'wb') as pickle_file: pickle.dump(cfg, pickle_file, pickle.HIGHEST_PROTOCOL) # learning rate schduler ------------- if LR is not None: lr = LR.get_rate(current_iter) if lr < 0: break adjust_learning_rate(optimizer, lr / iter_accum) rate = get_learning_rate(optimizer) * iter_accum # one current_iter update ------------- inputs = Variable(inputs).cuda() net(inputs, truth_boxes, truth_labels, truth_instances) loss = net.loss(inputs, truth_boxes, truth_labels, truth_instances) # accumulated update loss.backward() if j % iter_accum == 0: #torch.nn.utils.clip_grad_norm(net.parameters(), 1) optimizer.step() optimizer.zero_grad() # print statistics ------------ batch_acc = 0 #acc[0][0] batch_loss = np.array(( loss.cpu().data.numpy(), net.rpn_cls_loss.cpu().data.numpy(), net.rpn_reg_loss.cpu().data.numpy(), net.rcnn_cls_loss.cpu().data.numpy(), net.rcnn_reg_loss.cpu().data.numpy(), net.mask_cls_loss.cpu().data.numpy(), )) sum_train_loss += batch_loss sum_train_acc += batch_acc sum += 1 if current_iter % iter_smooth == 0: train_loss = sum_train_loss / sum train_acc = sum_train_acc / sum sum_train_loss = np.zeros(6, np.float32) sum_train_acc = 0. sum = 0 print('\r%0.4f %5.1f k %6.1f %4.1f m | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %0.3f %0.2f %0.2f %0.2f %0.2f %0.2f | %s %d,%d,%s' % (\ rate, current_iter/1000, epoch, num_products/1000000, valid_loss[0], valid_loss[1], valid_loss[2], valid_loss[3], valid_loss[4], valid_loss[5],#valid_acc, train_loss[0], train_loss[1], train_loss[2], train_loss[3], train_loss[4], train_loss[5],#train_acc, batch_loss[0], batch_loss[1], batch_loss[2], batch_loss[3], batch_loss[4], batch_loss[5],#batch_acc, time_to_str((timer() - start)/60) ,current_iter,j, ''), end='',flush=True)#str(inputs.size())) j = j + 1 pass #-- end of one data loader -- pass #-- end of all iterations -- if 1: #save last torch.save(net.state_dict(), out_dir + '/checkpoint/%d_model.pth' % (current_iter)) """ torch.save({ 'optimizer': optimizer.state_dict(), 'current_iter': current_iter, 'epoch': epoch, }, out_dir + '/checkpoint/%d_optimizer.pth' % (current_iter)) """ log.write('\n')
def run_multi_masks_prediction(): initial_checkpoint = RESULTS_DIR + '/mask-rcnn-50-gray500-02/checkpoint/best_model.pth' os.makedirs(OUT_DIR + '/submit/overlays', exist_ok=True) os.makedirs(OUT_DIR + '/submit/npys', exist_ok=True) log = Logger() log.open(OUT_DIR + '/log.evaluate.txt', mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) log.write('** some experiment setting **\n') log.write('\tSEED = %u\n' % SEED) log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH) log.write('\tout_dir = %s\n' % OUT_DIR) log.write('\n') cfg = Configuration() net = MaskRcnnNet(cfg).cuda() if initial_checkpoint is not None: log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint) net.load_state_dict( torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)) log.write('%s\n\n' % (type(net))) log.write('\n') log.write('** dataset setting **\n') test_dataset = ScienceDataset('test1_ids_gray_only_53', mode='test', transform=_submit_augment) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=1, drop_last=False, num_workers=4, pin_memory=True, collate_fn=_submit_collate) log.write('\ttest_dataset.split = %s\n' % (test_dataset.split)) log.write('\tlen(test_dataset) = %d\n' % (len(test_dataset))) log.write('\n') log.write('** start evaluation here! **\n') net.set_mode('test') for inputs, images, indices in tqdm(test_loader, 'Mask-RCNN predictions'): batch_size = inputs.size()[0] # NOTE: Current version support batch_size==1 for variable size input. To use # batch_size > 1, need to fix code for net.windows, etc. assert (batch_size == 1) with torch.no_grad(): inputs = Variable(inputs).cuda() net(inputs) # Resize results to original images shapes. results = net.results _revert(results, images) for index_in_batch in range(batch_size): image = images[index_in_batch] index = indices[index_in_batch] mask = results[index_in_batch].multi_mask image_id = test_dataset.ids[index] save_prediction_info(image_id, image, mask)