def main(args, conf): ## DIY dataset tr_dataset = AudioDataset(conf, data) cv_dataset = AudioDataset(conf, data) ## DIY dataloader batch_size = conf.get("batch_size", 33) nun_workers = conf.get("num_workers", 5) tr_loader = AudioDataLoader(tr_dataset, batch_size=batch_size, num_workers=nun_workers) cv_loader = AudioDataLoader(cv_dataset, batch_size=batch_size, num_workers=nun_workers) loader_dict = {'tr_loader': tr_loader, 'cv_loader': cv_loader} ## DIY model model = SegNet(conf) print(model) model.cuda() optimizier = get_optim(model, conf) # solver solver = Solver(conf, loader_dict, model, optimizier) solver.train()
def getModel(args): if args.arch == 'UNet': model = Unet(3, 1).to(device) if args.arch == 'resnet34_unet': model = resnet34_unet(1, pretrained=False).to(device) if args.arch == 'unet++': args.deepsupervision = True model = NestedUNet(args, 3, 1).to(device) if args.arch == 'Attention_UNet': model = AttU_Net(3, 1).to(device) if args.arch == 'segnet': model = SegNet(3, 1).to(device) if args.arch == 'r2unet': model = R2U_Net(3, 1).to(device) # if args.arch == 'fcn32s': # model = get_fcn32s(1).to(device) if args.arch == 'myChannelUnet': model = myChannelUnet(3, 1).to(device) if args.arch == 'fcn8s': assert args.dataset != 'esophagus', "fcn8s模型不能用于数据集esophagus,因为esophagus数据集为80x80,经过5次的2倍降采样后剩下2.5x2.5,分辨率不能为小数,建议把数据集resize成更高的分辨率再用于fcn" model = get_fcn8s(1).to(device) if args.arch == 'cenet': from cenet import CE_Net_ model = CE_Net_().to(device) return model
def train_model(training_loader): global epoch_list global learning_rate_list if train_from_last_model: model = load_model() learning_rate_list, epoch_list = load_info() print("Load the exist model and continue") else: model = SegNet(input_channel,output_channel).cuda() print("Train a new model") optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(NUM_EPOCHS): t_start = time.time() # i = 0 count_batch = 0 #count loss loss_sum = 0 epoch_list.append(epoch+1) # x_axis_list = [] #graph # y_axis_list = [] #graph # tqdm.write('epoch = {}'.format(epoch+1)) for i, batch in enumerate(tqdm(training_loader)): # load data input_tensor = Variable(batch['camera_5']).cuda() target_tensor = Variable(batch['fg_mask']).cuda() predicted_tensor, softmaxed_tensor = model(input_tensor) criterion = torch.nn.CrossEntropyLoss().cuda() #criterion = nn.BCEWithLogitsLoss().cuda() optimizer.zero_grad() loss = criterion(predicted_tensor, target_tensor) #print('loss = {}'.format(loss)) loss.backward() optimizer.step() loss_sum += loss count_batch += 1 torch.cuda.empty_cache() # tqdm().clear() average_loss = loss_sum / count_batch learning_rate_list.append(float(average_loss)) tqdm.write('{} epoch: loss = {}'.format(epoch+1,average_loss)) plot_learning_curve(len(epoch_list)) save_model(model) save_info() return model
def __init__(self, fusion, bottleneck, fusion_activ, segnet_models=None, num_classes=3, decoders="multi", pretrained_last_layer=False, late_dilation=1, branches=2, viz=False): super(FusionNet, self).__init__() self.fusion = False self.viz = viz fusion_module = { "ssma": SSMA, "custom": SSMACustom } if segnet_models is None: segnet_models = [SegNet(num_classes=num_classes) for i in range(branches)] else: branches = len(segnet_models) if len(segnet_models) > 1: self.encoder_mod1 = segnet_models[0].encoders # self.encoder_mod1.res_n50_enc.layer3[2].dropout = False self.encoder_mod2 = segnet_models[1].encoders # self.encoder_mod2.res_n50_enc.layer3[2].dropout = False # self.ssma_s1 = SSMA(24, 6) # self.ssma_s2 = SSMA(24, 6) if branches == 3: self.encoder_mod3 = segnet_models[2].encoders else: self.encoder_mod3 = None self.ssma_res = fusion_module[fusion]( segnet_models[0].filter_config[-1], bottleneck=bottleneck, branches=branches, fusion_activ=fusion_activ) self.decoder_mod1 = segnet_models[0].decoders if decoders == "multi" or decoders == "late": self.decoder_mod2 = segnet_models[1].decoders if branches == 3: self.decoder_mod3 = segnet_models[2].decoders else: self.decoder_mod3 = None self.classifier = fusion_module[fusion]( segnet_models[0].filter_config[0], bottleneck=bottleneck, out=num_classes, branches=branches, late_dilation=late_dilation, fusion_activ=fusion_activ) if fusion=="custom" and pretrained_last_layer: self.classifier.final_conv = segnet_models[0].classifier elif decoders == "single": self.decoder_mod2 = None self.classifier = segnet_models[0].classifier self.fusion = decoders else: self.encoder_mod1 = segnet_models[0].encoders self.decoder_mod1 = segnet_models[0].decoders self.classifier = segnet_models[0].classifier
def __init__(self): self.bridge = CvBridge() self.img_height = 0 self.img_width = 0 self.blueColor = (255, 0, 0) self.thickness = 2 self.fontScale = 0.7 self.font = cv2.FONT_HERSHEY_SIMPLEX self.get_pixel_depth = rospy.ServiceProxy('get_depth', GetDepth) self.fx = 1.93 #focal length of Intel D435i camera in mm self.mm_pix = 0.003 # width of a pixel is .003 mm self.img_center = (640, 360) #center of the image plane is (640, 360) self.factor_scale = 1.509 #ratio between color matrix dims and depth matrix dims self.segNet = SegNet() with open('./ssd_coco_labels.txt', 'r') as f: self.classes = f.read().splitlines()
def create(): ''' Creates the segmentation neural network ''' return SegNet(SEGNET_IMAGE_DIMS, 2)
def __init__(self, conf, viz=False, save=False, test_set=None, test_checkpoint = None, test_max=None, model_only=False, num_classes = None, modalities=None, dataset_seq=None, nopredict=False, **kwargs): super().__init__() pl.seed_everything(RANDOM_SEED) self.save_hyperparameters(conf) if modalities is not None: self.hparams.modalities = modalities self.hparams.modalities = self.hparams.modalities.split(",") logger.warning(f"params {self.hparams}") init_channels = len(self.hparams.modalities) if self.hparams.init_channels is None else self.hparams.init_channels self.model = SegNet( num_classes=self.hparams.num_classes if num_classes is None else num_classes, n_init_features=init_channels, depthwise_conv=self.hparams.depthwise_conv ) if not model_only: self.save = save logger.info(f"Save {self.save}") self.viz = viz self.hparams.resize = (480, 240) self.hparams.masking = True self.hparams.normalize = False self.hparams.lwmap_range = tuple([float(i) for i in self.hparams.lwmap_range.split(",")]) self.test_checkpoint = test_checkpoint self.test_max = test_max self.test_set = test_set if test_set is not None else "test" self.dataset_seq = dataset_seq self.nopredict = nopredict self.datasets = { "freiburg": FreiburgDataLoader, "freiburgthermal": FreiburgThermalDataLoader, "cityscapes": CityscapesDataLoader, "kitti": KittiDataLoader, "own": OwnDataLoader, "thermalvoc": ThermalVOCDataLoader, "synthia": SynthiaDataLoader, "kaistped": KAISTPedestrianDataLoader, "kaistpedann": KAISTPedestrianAnnDataLoader, "multispectralseg": MIRMultispectral, "lostfound": LostFoundDataLoader, "freiburgraw": FreiburgForestRawDataLoader, "cityscapesraw": CityscapesRawDataLoader, "rugd": RUGDDataLoader, "wilddash": WildDashDataLoader } if self.hparams.orig_dataset is None and self.hparams.mode in ["affordances", "objects"]: self.hparams.orig_dataset = self.hparams.dataset if not (self.hparams.dataset == "combo"): self.orig_dataset = self.get_dataset(name=self.hparams.orig_dataset, set=self.test_set) else: if self.hparams.dataset_combo is None: self.hparams.dataset_combo = "freiburg,cityscapes,thermalvoc,synthia,kitti,multispectralseg,freiburgthermal,lostfound" self.hparams.dataset_combo = self.hparams.dataset_combo.split(",") self.orig_dataset = self.get_dataset_combo(set=self.test_set) if self.hparams.loss in ["sord","compare"]: self.hparams.ranks = [int(r) for r in self.hparams.ranks.split(",")] else: self.hparams.ranks = [1,2,3] self.train_set, self.val_set, self.test_set = self.get_dataset_splits(normalize=self.hparams.normalize) self.hparams.train_set, self.hparams.val_set, self.hparams.test_set = \ len(self.train_set.dataset), len(self.val_set.dataset), len(self.test_set.dataset) self.update_settings()
def load_model(): segnet_save_path = os.path.join(model_dir,'segnet_model.pth') # model = torch.load(segnet_save_path) model = SegNet(input_channel,output_channel).cuda() model.load_state_dict(torch.load(segnet_save_path)) return model
class ImageInfo(): def __init__(self): self.bridge = CvBridge() self.img_height = 0 self.img_width = 0 self.blueColor = (255, 0, 0) self.thickness = 2 self.fontScale = 0.7 self.font = cv2.FONT_HERSHEY_SIMPLEX self.get_pixel_depth = rospy.ServiceProxy('get_depth', GetDepth) self.fx = 1.93 #focal length of Intel D435i camera in mm self.mm_pix = 0.003 # width of a pixel is .003 mm self.img_center = (640, 360) #center of the image plane is (640, 360) self.factor_scale = 1.509 #ratio between color matrix dims and depth matrix dims self.segNet = SegNet() with open('./ssd_coco_labels.txt', 'r') as f: self.classes = f.read().splitlines() def deproject_cam_coords(self, obj_center, depth): u_pixel = obj_center[0] v_pixel = obj_center[1] x_plane = self.mm_pix * (u_pixel - self.img_center[0]) y_plane = self.mm_pix * (self.img_center[1] - v_pixel) #calc y coord y_coord = (depth * y_plane) / math.sqrt(y_plane**2 + (x_plane**2 + self.fx**2)) #calc x coord if (y_coord == 0): x_coord = depth * (x_plane / math.sqrt(x_plane**2 + self.fx**2)) else: x_coord = y_coord * (x_plane / y_plane) #calc z coord if (x_coord == 0 and y_coord == 0): z_coord = depth elif (x_coord == 0): z_coord = math.sqrt(depth**2 - y_coord**2) elif (y_coord == 0): z_coord = math.sqrt(depth**2 - x_coord**2) else: z_coord = x_coord * (self.fx / x_plane) arr = [x_coord, y_coord, z_coord] print(arr) #return [x_coord, y_coord, z_coord] def getDepthFromNeighborPixels(self, center): radius = 5 depthsArr = [] #center query center_x_scaled = int(center[0] / self.factor_scale) center_y_scaled = int(center[1] / self.factor_scale) center_req = GetDepthRequest(center_x_scaled, center_y_scaled) center_depth = self.get_pixel_depth(center_req) #in mm depthsArr.append(center_depth.depth) #north point query if (center[1] >= radius): north_x_scaled = int(center[0] / self.factor_scale) north_y_scaled = int((center[1] - radius) / self.factor_scale) north_req = GetDepthRequest(north_x_scaled, north_y_scaled) north_depth = self.get_pixel_depth(north_req) #in mm depthsArr.append(north_depth.depth) #east point query if (center[0] + radius < 1280): east_x_scaled = int((center[0] + radius) / self.factor_scale) east_y_scaled = int(center[1] / self.factor_scale) east_req = GetDepthRequest(east_x_scaled, east_y_scaled) east_depth = self.get_pixel_depth(east_req) #in mm depthsArr.append(east_depth.depth) #south point query if (center[1] + radius < 720): south_x_scaled = int(center[0] / self.factor_scale) south_y_scaled = int((center[1] + radius) / self.factor_scale) south_req = GetDepthRequest(south_x_scaled, south_y_scaled) south_depth = self.get_pixel_depth(south_req) #in mm depthsArr.append(south_depth.depth) #west point query if (center[0] >= radius): west_x_scaled = int((center[0] - radius) / self.factor_scale) west_y_scaled = int(center[1] / self.factor_scale) west_req = GetDepthRequest(west_x_scaled, west_y_scaled) west_depth = self.get_pixel_depth(west_req) #in mm depthsArr.append(west_depth.depth) depthsArr = np.array(depthsArr) return np.median(depthsArr) def drawBoundingBox(self, img, detection): #unpack detection object to get info about bounding box classID = detection.ClassID conf = detection.Confidence center = detection.Center width = int(detection.Width) height = int(detection.Height) #get top left coords and box label topLeft_x = int(center[0] - (width / 2)) topLeft_y = int(center[1] - (height / 2)) box_label = '{}:{}'.format(self.classes[classID], round(conf, 2)) #only display bounding box, depths, and 3D coords if object is PERSON if (classID == 62): # Query segnet class for centroid. crop = img[topLeft_y:topLeft_y + height, topLeft_x:topLeft_x + width, :] mask, crop_centroid = self.segNet.getCentroid(crop, classID) img_centroid = (topLeft_x + crop_centroid[0], topLeft_y + crop_centroid[1] ) # add coords to top left of bounding box # Get depth of centroid by sampling neighboring pixels and deproject into 3D camera coords depth = self.getDepthFromNeighborPixels(img_centroid) #self.deproject_cam_coords(img_centroid, depth) depthLabel = '{}m'.format(depth / 1000.0) # Render important information. cv2.rectangle(img, (topLeft_x, topLeft_y), (topLeft_x + width, topLeft_y + height), self.blueColor, self.thickness) #draw bounding box cv2.putText(img, box_label, (topLeft_x, topLeft_y + 20), self.font, self.fontScale, self.blueColor, self.thickness) #draw class: conf cv2.putText(img, depthLabel, (topLeft_x, topLeft_y + 40), self.font, self.fontScale, self.blueColor, self.thickness) #draw depth in meters cv2.circle(img, (int(img_centroid[0]), int(img_centroid[1])), 5, (0, 255, 0), -1) #draw centroid within bounding box return img def callback(self, msg): height = msg.height width = msg.width np_image_bgr = self.bridge.imgmsg_to_cv2(msg) np_image_rgb = cv2.cvtColor(np_image_bgr, cv2.COLOR_BGR2RGB) cuda_img = jetson.utils.cudaFromNumpy(np_image_rgb) detections = net.Detect(cuda_img) jetson.utils.cudaDeviceSynchronize() for detection in detections: if (detection.ClassID != 62): continue np_image_rgb = self.drawBoundingBox(np_image_rgb, detection) cv2.imshow('CV2 Capture', np_image_rgb) keyPress = cv2.waitKey(1)
def main(_run, _config, world_size, rank, init_method, datadir, outdir_suffix, batch_size, num_workers, outdir, lr, wd, num_epochs, nsamples): cudnn.benchmark = True device = torch.device('cuda:0') # device is set by CUDA_VISIBLE_DEVICES torch.cuda.set_device(device) # rank 0 creates experiment observer is_master = rank == 0 # rank joins process group print('rank', rank, 'init_method', init_method) dist.init_process_group('nccl', rank=rank, world_size=world_size, init_method=init_method) # actual training stuff train = make_loader( pt.join(datadir, '') if datadir else None, batch_size, device, world_size, rank, num_workers, # this the parameter based on which augmentation is applied to the data gpu_augmentation=False, image_rng=None, nsamples=nsamples) print('\n experiment name ', exp) # outdir stuff if outdir is None: outdir = pt.join(ROOT, '../exp/', outdir_suffix) model = Net(batch_size=batch_size) model = model.to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) optimizer, policy = make_policy(num_epochs, model, lr, 0.9, wd) # loss for autoencoder loss = L1Loss(output_key='output', target_key='target_image').to(device) trainer = Trainer(model, optimizer, loss, rank, MSELossMetric(), policy, None, train, None, outdir, snapshot_interval=4 if is_master else None, quiet=True if not is_master else False) print('\n Number of epochs are: ', num_epochs) start = datetime.now() with train: trainer.train(num_epochs, start_epoch=0) print("Training complete in: " + str(datetime.now() - start))
def transfer_pretrained_weighted(): model = SegNet() corresp_name = { 'features.0.weight': 'vgg16_block1.0.weight', 'features.0.bias': 'vgg16_block1.0.bias', 'features.1.weight': 'vgg16_block1.1.weight', 'features.1.bias': 'vgg16_block1.1.bias', 'features.1.running_mean': 'vgg16_block1.1.running_mean', 'features.1.running_var': 'vgg16_block1.1.running_var', 'features.3.weight': 'vgg16_block1.3.weight', 'features.3.bias': 'vgg16_block1.3.bias', 'features.4.weight': 'vgg16_block1.4.weight', 'features.4.bias': 'vgg16_block1.4.bias', 'features.4.running_mean': 'vgg16_block1.4.running_mean', 'features.4.running_var': 'vgg16_block1.4.running_var', 'features.7.weight': 'vgg16_block2.0.weight', 'features.7.bias': 'vgg16_block2.0.bias', 'features.8.weight': 'vgg16_block2.1.weight', 'features.8.bias': 'vgg16_block2.1.bias', 'features.8.running_mean': 'vgg16_block2.1.running_mean', 'features.8.running_var': 'vgg16_block2.1.running_var', 'features.10.weight': 'vgg16_block2.3.weight', 'features.10.bias': 'vgg16_block2.3.bias', 'features.11.weight': 'vgg16_block2.4.weight', 'features.11.bias': 'vgg16_block2.4.bias', 'features.11.running_mean': 'vgg16_block2.4.running_mean', 'features.11.running_var': 'vgg16_block2.4.running_var', 'features.14.weight': 'vgg16_block3.0.weight', 'features.14.bias': 'vgg16_block3.0.bias', 'features.15.weight': 'vgg16_block3.1.weight', 'features.15.bias': 'vgg16_block3.1.bias', 'features.15.running_mean': 'vgg16_block3.1.running_mean', 'features.15.running_var': 'vgg16_block3.1.running_var', 'features.17.weight': 'vgg16_block3.3.weight', 'features.17.bias': 'vgg16_block3.3.bias', 'features.18.weight': 'vgg16_block3.4.weight', 'features.18.bias': 'vgg16_block3.4.bias', 'features.18.running_mean': 'vgg16_block3.4.running_mean', 'features.18.running_var': 'vgg16_block3.4.running_var', 'features.20.weight': 'vgg16_block3.6.weight', 'features.20.bias': 'vgg16_block3.6.bias', 'features.21.weight': 'vgg16_block3.7.weight', 'features.21.bias': 'vgg16_block3.7.bias', 'features.21.running_mean': 'vgg16_block3.7.running_mean', 'features.21.running_var': 'vgg16_block3.7.running_var', 'features.24.weight': 'vgg16_block4.0.weight', 'features.24.bias': 'vgg16_block4.0.bias', 'features.25.weight': 'vgg16_block4.1.weight', 'features.25.bias': 'vgg16_block4.1.bias', 'features.25.running_mean': 'vgg16_block4.1.running_mean', 'features.25.running_var': 'vgg16_block4.1.running_var', 'features.27.weight': 'vgg16_block4.3.weight', 'features.27.bias': 'vgg16_block4.3.bias', 'features.28.weight': 'vgg16_block4.4.weight', 'features.28.bias': 'vgg16_block4.4.bias', 'features.28.running_mean': 'vgg16_block4.4.running_mean', 'features.28.running_var': 'vgg16_block4.4.running_var', 'features.30.weight': 'vgg16_block4.6.weight', 'features.30.bias': 'vgg16_block4.6.bias', 'features.31.weight': 'vgg16_block4.7.weight', 'features.31.bias': 'vgg16_block4.7.bias', 'features.31.running_mean': 'vgg16_block4.7.running_mean', 'features.31.running_var': 'vgg16_block4.7.running_var', 'features.34.weight': 'vgg16_block5.0.weight', 'features.34.bias': 'vgg16_block5.0.bias', 'features.35.weight': 'vgg16_block5.1.weight', 'features.35.bias': 'vgg16_block5.1.bias', 'features.35.running_mean': 'vgg16_block5.1.running_mean', 'features.35.running_var': 'vgg16_block5.1.running_var', 'features.37.weight': 'vgg16_block5.3.weight', 'features.37.bias': 'vgg16_block5.3.bias', 'features.38.weight': 'vgg16_block5.4.weight', 'features.38.bias': 'vgg16_block5.4.bias', 'features.38.running_mean': 'vgg16_block5.4.running_mean', 'features.38.running_var': 'vgg16_block5.4.running_var', 'features.40.weight': 'vgg16_block5.6.weight', 'features.40.bias': 'vgg16_block5.6.bias', 'features.41.weight': 'vgg16_block5.7.weight', 'features.41.bias': 'vgg16_block5.7.bias', 'features.41.running_mean': 'vgg16_block5.7.running_mean', 'features.41.running_var': 'vgg16_block5.7.running_var', } s_dict = model.state_dict() pretrained_dict = torch.load( 'vgg16_bn-6c64b313.pth' ) # you have to download pretrained model weight pth for name in pretrained_dict: if name not in corresp_name: continue s_dict[corresp_name[name]] = pretrained_dict[name] model.load_state_dict(s_dict) torch.save(model.state_dict(), 'transfer-vgg16-for11classes.pth')
#print(i1.shape,i2.shape, i_12.shape) i_12_w = self.link(i_12) b,c,h,w = i_12_w.shape i_12_w = i_12_w.view(b,self.branches,int(c/self.branches),h,w) #print(i_12_w.shape) i_12_w = self.sm(i_12_w) #print(i_12_w.shape) #x_12 = torch.sum(i_12_w, dim=1) x_12 = torch.unbind(i_12_w, dim=1) #print(i1.shape, x_12[0].shape) #print(i1.long()[0][0][0][:5], i2.long()[0][0][0][:5]) fused = m_lst[0] * x_12[0] for f in range(1,self.branches): fused += (m_lst[f] * x_12[f]) #print(fused.long()[0][0][0][:5]) if self.final: fused = self.final_conv(fused) return fused if __name__ == "__main__": segnet = SegNet(num_classes=3) encoder = segnet.encoders #print(encoder) fusion = FusionNet( encoders=[encoder], decoder=segnet.decoders, classifier=segnet.classifier) print(fusion) print(SSMA(features=512, bottleneck=3))
train_dataset, batch_size=hparams.batch_size, shuffle=True, num_workers=hparams.num_workers, pin_memory=True, drop_last=True, ) test_loader = DataLoader( test_dataset, batch_size=hparams.batch_size, shuffle=False, num_workers=hparams.num_workers, pin_memory=True, ) model = SegNet(11, 3, drop_rate=0.2) writer.add_graph(model, torch.zeros(1, 3, 360, 480)) model = model.to(device) cls_w = class_weights(train_dataset).astype(np.float32) cls_w = torch.from_numpy(cls_w[:-1]) criterion = nn.CrossEntropyLoss(reduction="mean", ignore_index=11, weight=cls_w).to(device) # optimizer = optim.Adam( optimizer = optim.SGD( model.parameters(), lr=float(hparams.init_lr), weight_decay=float(hparams.weight_decay), momentum=0.9, )
def train(train_run=(True, True), restore=False, epoch=0): disc_train_run, segm_train_run = train_run train_run = disc_train_run or segm_train_run if not train_run: FLAGS.batch_size = 1 FLAGS.num_epochs = 1 timestr = time.strftime("TRAIN/%d_%b_%Y_%H_%M", time.localtime()) if train_run else time.strftime( "TEST/%d_%b_%Y_%H_%M", time.localtime()) timestr = timestr + "_EPOCH_%d" % epoch # Location to log to. split = 'TRAIN' if train_run else 'TEST' filestr = FLAGS.run_dir + "tensorlogs/" + timestr + '/' ft.directoryFixer(filestr + 'patches/') print("Running from: " + filestr) tf.reset_default_graph() discriminator = DiscNet(disc_train_run, restore, timestr, split) tf.reset_default_graph() segmenter = SegNet(segm_train_run, restore, timestr) # Starts the input generator print("\rSTARTING INPUT GENERATION THREADS...") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=discriminator.sess, coord=coord) print("STARTING TRAINING...") step = 0 full_pats = 0 epoch = 0 disc_losses = [] segm_losses = [] sum_cmat = np.zeros((FLAGS.num_classes, FLAGS.num_classes)) game_1 = [] game_2 = [] game_3 = [] game_4 = [] ssim = [] psnr = [] rers = [] maes = [] mses = [] try: while not coord.should_stop(): # Run the network and write summaries start = timeit.default_timer() try: orig_img, orig_lab, ids, img_pat, lab_pat, disc_log, count, disc_loss = discriminator.run( ) disc_losses.append(disc_loss) except IndexError: break # Epoch done. if FLAGS.cell_selection or (FLAGS.positive_training and train_run): img_pat = np.reshape( img_pat, (-1, FLAGS.patch_size, FLAGS.patch_size, 3)) lab_pat = np.reshape( lab_pat, (-1, FLAGS.patch_size, FLAGS.patch_size, 1)) disc_log = np.squeeze(disc_log) else: img_pat = np.reshape(orig_img, (-1, FLAGS.imgH, FLAGS.imgW, 3)) lab_pat = np.reshape(orig_lab, (-1, FLAGS.imgH, FLAGS.imgW, 1)) disc_log = np.ones((FLAGS.batch_size)) # Do some processing, create new array with only patches we need. new_imgs = [] new_labs = [] for x in range(len(disc_log)): if (disc_log[x] == 1): new_imgs.append(img_pat[x]) new_labs.append(lab_pat[x]) new_imgs = np.array(new_imgs) new_labs = np.array(new_labs) imgs_labs_losses = [] if (np.sum(disc_log) > 0): seg_log, seg_loss, per_pat_loss, cmat = segmenter.run( new_imgs, new_labs, count) segm_losses.append(seg_loss) sum_cmat += cmat # Train on the worst 1/2 images twice. if train_run: im_loss = zip(new_imgs, new_labs, per_pat_loss) [ imgs_labs_losses.append(im_lab_loss) for im_lab_loss in im_loss ] # Do some more processing, weave the resultant patches into an array # of the resultant logit map y = 0 stop = timeit.default_timer() print('\rTime: ', stop - start, end='') if not train_run: full_pats = np.zeros(shape=lab_pat.shape, dtype=np.float32) for x in range(len(disc_log)): if (disc_log[x] == 1): full_pats[x] = seg_log[y] y += 1 orig_img = np.squeeze(orig_img).astype('uint8') orig_lab = np.squeeze(orig_lab) result = np.squeeze(patches_to_image(full_pats)) with h5py.File(filestr + '%s_log.png' % ids, 'w') as hf: hf['density'] = np.squeeze(orig_lab) with h5py.File(filestr + '%s_ann.png' % ids, 'w') as hf: hf['density'] = np.squeeze(result) # Before we colorize the result, we want to run the GAME metrics game_1.append(util.cpu_GAME(orig_lab, result, 1)) game_2.append(util.cpu_GAME(orig_lab, result, 2)) game_3.append(util.cpu_GAME(orig_lab, result, 3)) game_4.append(util.cpu_GAME(orig_lab, result, 4)) ssim.append(util.cpu_psnr(orig_lab, result)) psnr.append(util.cpu_ssim(orig_lab, result)) rers.append( np.abs(np.sum(orig_lab) - np.sum(result)) / np.sum(orig_lab)) maes.append(np.abs(np.sum(orig_lab) - np.sum(result))) mses.append((np.sum(orig_lab) - np.sum(result))**2) result = util.cpu_colorize(result) orig_lab = util.cpu_colorize(np.squeeze(orig_lab)) img = Image.fromarray(result) img.save(filestr + '%d_log.png' % step) img = Image.fromarray(orig_img) img.save(filestr + '%d_img.png' % step) img = Image.fromarray(orig_lab) img.save(filestr + '%d_lab.png' % step) for x in range(new_imgs.shape[0]): img = Image.fromarray( np.squeeze(new_imgs[x]).astype(np.uint8)) img.save(filestr + 'patches/' + '%d_%d_img_pat.png' % (step, x)) img = Image.fromarray( np.squeeze(util.cpu_colorize(new_labs[x]))) img.save(filestr + 'patches/' + '%d_%d_lab_pat.png' % (step, x)) img = Image.fromarray( np.squeeze(util.cpu_colorize(seg_log[x]))) img.save(filestr + 'patches/' + '%d_%d_log_pat.png' % (step, x)) step += 1 # Train on bad patches over again. if segm_train_run: imgs_labs_losses.sort(key=lambda tup: tup[2]) iterval = 0 while len(imgs_labs_losses) > 1: iterval += 1 imgs_labs_losses = imgs_labs_losses[len(imgs_labs_losses) // 2:] generator.perturb(imgs_labs_losses) new_imgs = [] new_labs = [] [(new_imgs.append(new_img), new_labs.append(new_lab)) for new_img, new_lab, _ in imgs_labs_losses] # Run through 10 patches at a time as a batch. for x in range(len(new_imgs) // 10 + 1): print('\rTRAINING ON HARD EXAMPLES %d/%d ITER %d' % (x, len(new_imgs) // 10 + 1, iterval), end='') _ = segmenter.run(new_imgs[x * 10:(x + 1) * 10], new_labs[x * 10:(x + 1) * 10], 0, False) print('\rDONE TRAINING HARD EXAMPLES') except KeyboardInterrupt: pass finally: if train_run: discriminator.save() segmenter.save() else: print("GAME 1,2,3,4") print("Game 1, ", np.mean(game_1)) print("Game 2, ", np.mean(game_2)) print("Game 3, ", np.mean(game_3)) print("Game 4, ", np.mean(game_4)) print("SSIM, ", np.mean(ssim)) print("PSNR, ", np.mean(psnr)) print("Rel_Err, ", np.mean(rers)) print("MAE, ", np.mean(maes)) print("MSE, ", np.mean(mses)) coord.request_stop() coord.join(threads) np.save(filestr + "data.dat", sum_cmat) discriminator.close() segmenter.close() return np.mean(disc_losses), np.mean(segm_losses)
def main(): global args, best_prec1 args = parser.parse_args() # Check if the save directory exists or not if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) model = SegNet(3, 3) #model.features = torch.nn.DataParallel(model.features) if use_gpu: model.cuda() # Optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) #optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # data_transforms = { # 'train': transforms.Compose([ # transforms.Scale(256), # transforms.RandomSizedCrop(224), # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]), # ]), # 'val': transforms.Compose([ # transforms.Scale(256), # transforms.CenterCrop(224), # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]), # ]), # } data_transforms = { 'train': transforms.Compose([ transforms.Scale((224, 224)), transforms.ToTensor(), ]), 'val': transforms.Compose([ transforms.Scale((224, 224)), transforms.ToTensor(), ]), } data_dir = '/media/salman/DATA/NUST/MS RIME/Thesis/MICCAI Dataset/miccai_all_images' image_datasets = { x: miccaiDataset(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val'] } dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.batch_size, shuffle=True, num_workers=args.workers) for x in ['train', 'val'] } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} # Define loss function (criterion) and optimizer criterion = nn.MSELoss().cuda() if args.half: model.half() criterion.half() #optimizer = torch.optim.SGD(model.parameters(), args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) if args.evaluate: validate(dataloaders['val'], model, criterion) return for epoch in range(args.start_epoch, args.epochs): #adjust_learning_rate(optimizer, epoch) # Train for one epoch train(dataloaders['train'], model, criterion, optimizer, epoch) # Evaulate on validation set prec1 = validate(dataloaders['val'], model, criterion) prec1 = prec1.cpu().data.numpy() # Remember best prec1 and save checkpoint print(prec1) print(best_prec1) is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, #'optimizer': optimizer.state_dict(), }, is_best, filename=os.path.join(args.save_dir, 'checkpoint_{}.tar'.format(epoch)))
from torch import nn from camvid_dataset import CamVidDataset from segnet import SegNet import time import matplotlib.pyplot as plt import argparse parse = argparse.ArgumentParser() parse.add_argument('--mode', choices=['train', 'val', 'test']) parse.add_argument('--batch_size', '-b', type=int, default=16) parse.add_argument('--resume', type=bool, default=False) parse.add_argument('--epochs', type=int, default=33) args = parse.parse_args() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = SegNet() if args.mode == 'train': model.load_state_dict(torch.load('transfer-vgg16-for11classes.pth')) else: model.load_state_dict(torch.load('segnet_weight_11classes.pth')) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to(device) def train(epochs): model.train() train_dataset = CamVidDataset(phase='train') train_loader = DataLoader(train_dataset,
def handler(context): # Dataset dataset_alias = context.datasets train_dataset_id = dataset_alias['train'] val_dataset_id = dataset_alias['val'] trainset = SegmentationDatasetFromAPI(train_dataset_id, transform=SegNetAugmentation(MEANS)) valset = SegmentationDatasetFromAPI(val_dataset_id, transform=SegNetAugmentation( MEANS, False)) class_weight = calc_weight( SegmentationDatasetFromAPI(train_dataset_id, transform=SegNetAugmentation(MEANS, False))) class_weight = class_weight.to(device) trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCHSIZE, shuffle=True, num_workers=0) valloader = torch.utils.data.DataLoader(valset, batch_size=BATCHSIZE, shuffle=False, num_workers=0) # Model net = SegNet(3, n_class=len(camvid_label_names)) net = net.to(device) # Optimizer #criterion = PixelwiseSoftmaxClassifier(weight=class_weight) criterion = torch.nn.CrossEntropyLoss(weight=class_weight, ignore_index=-1) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1) statistics = Statistics(epochs) for epoch in range(epochs): scheduler.step() train_loss, train_acc = train(net, optimizer, trainloader, criterion, epoch) test_loss, test_acc = test(net, valloader, criterion, epoch) # Reporting print( '[{:d}] main/loss: {:.3f} main/acc: {:.3f}, main/validation/loss: {:.3f}, main/validation/acc: {:.3f}' .format(epoch + 1, train_loss, train_acc, test_loss, test_acc)) statistics(epoch + 1, train_loss, train_acc, test_loss, test_acc) writer.add_scalar('main/loss', train_loss, epoch + 1) writer.add_scalar('main/acc', train_acc, epoch + 1) writer.add_scalar('main/validation/loss', test_loss, epoch + 1) writer.add_scalar('main/validation/acc', test_acc, epoch + 1) torch.save(net.state_dict(), os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pth'))
def train(train_run = (True,True), restore = False, epoch = 0): disc_train_run,segm_train_run = train_run train_run = disc_train_run or segm_train_run if not train_run: FLAGS.batch_size = 1 FLAGS.num_epochs = 1 train_run = disc_train_run or segm_train_run timestr = time.strftime("TRAIN/%d_%b_%Y_%H_%M",time.localtime()) if train_run else time.strftime("TEST/%d_%b_%Y_%H_%M",time.localtime()) timestr = timestr + "_EPOCH_%d"%epoch # Location to log to. split = 'TRAIN' if train_run else 'TEST' # We just run from the saved model directory for demoing. FLAGS.run_dir = FLAGS.mod_dir filestr = FLAGS.run_dir + "tensorlogs/" + timestr + '/' ft.directoryFixer(filestr + 'patches/') print("Running from: " + filestr) tf.reset_default_graph() discriminator = DiscNet(disc_train_run,restore,timestr,split) tf.reset_default_graph() segmenter = SegNet(segm_train_run,restore,timestr) # Starts the input generator print("\rSTARTING INPUT GENERATION THREADS...") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess = discriminator.sess, coord = coord) print("STARTING TRAINING...") step = 0 full_pats = 0 epoch = 0 disc_losses = [] segm_losses = [] try: while not coord.should_stop(): # Run the network and write summaries try: orig_img,orig_lab,img_pat,lab_pat,disc_log,count,disc_loss = discriminator.run() disc_losses.append(disc_loss) except IndexError: break # Epoch done. img_pat = np.reshape(img_pat,(-1,FLAGS.patch_size,FLAGS.patch_size,3)) lab_pat = np.reshape(lab_pat,(-1,FLAGS.patch_size,FLAGS.patch_size,1)) disc_log = np.squeeze(disc_log) # Do some processing, create new array with only patches we need. new_imgs = [] new_labs = [] for x in range(len(disc_log)): if(disc_log[x] == 1): new_imgs.append(img_pat[x]) new_labs.append(lab_pat[x]) new_imgs = np.array(new_imgs) new_labs = np.array(new_labs) imgs_labs_losses = [] if(np.sum(disc_log) > 0): seg_log,seg_loss,per_pat_loss = segmenter.run(new_imgs,new_labs,count) segm_losses.append(seg_loss) # Train on the worst 1/2 images twice. if train_run: im_loss = zip(new_imgs,new_labs,per_pat_loss) [imgs_labs_losses.append(im_lab_loss) for im_lab_loss in im_loss] # Do some more processing, weave the resultant patches into an array # of the resultant logit map y = 0 if not train_run: full_pats = np.zeros(shape = lab_pat.shape) for x in range(len(disc_log)): if(disc_log[x] == 1): full_pats[x] = seg_log[y] y+=1 orig_img = np.squeeze(orig_img).astype('uint8') orig_lab = util.cpu_colorize(np.squeeze(orig_lab)) # Go from patches to full image logit map. result = patches_to_image(full_pats) img = Image.fromarray(util.cpu_colorize(result)) img.save(filestr + '%d_log.png'%step) img = Image.fromarray(orig_img) img.save(filestr + '%d_img.png'%step) img = Image.fromarray(orig_lab) img.save(filestr + '%d_lab.png'%step) for x in range(new_imgs.shape[0]): img = Image.fromarray(np.squeeze(new_imgs[x]).astype(np.uint8)) img.save(filestr + 'patches/' + '%d_%d_img_pat.png'%(step,x)) img = Image.fromarray(np.squeeze(util.cpu_colorize(new_labs[x]))) img.save(filestr + 'patches/' + '%d_%d_lab_pat.png'%(step,x)) img = Image.fromarray(np.squeeze(util.cpu_colorize(seg_log[x]))) img.save(filestr + 'patches/' + '%d_%d_lab_pat.png'%(step,x)) step +=1 # Train on bad patches over again. if segm_train_run: imgs_labs_losses.sort(key = lambda tup: tup[2]) iterval = 0 while len(imgs_labs_losses) > 1: iterval += 1 imgs_labs_losses = imgs_labs_losses[len(imgs_labs_losses)//2:] generator.perturb(imgs_labs_losses) new_imgs = [] new_labs = [] [(new_imgs.append(new_img),new_labs.append(new_lab)) for new_img,new_lab,_ in imgs_labs_losses] # Run through 10 patches at a time as a batch. for x in range(len(new_imgs)//10 + 1): print('\rTRAINING ON HARD EXAMPLES %d/%d ITER %d'%(x,len(new_imgs)//10+1,iterval),end='') _ = segmenter.run(new_imgs[x*10:(x+1)*10],new_labs[x*10:(x+1)*10],0,False) print('\rDONE TRAINING HARD EXAMPLES') except KeyboardInterrupt: pass finally: if train_run: discriminator.save() segmenter.save() coord.request_stop() coord.join(threads) discriminator.close() segmenter.close() return np.mean(disc_losses),np.mean(segm_losses)
def main(_run, _config, world_size, rank, init_method, datadir, batch_size, num_workers, outdir_suffix, outdir, lr, wd, warmup, num_epochs, nsamples): cudnn.benchmark = True device = torch.device('cuda:0') # device is set by CUDA_VISIBLE_DEVICES torch.cuda.set_device(device) # rank 0 creates experiment observer is_master = rank == 0 # rank joins process group print('rank', rank, 'init_method', init_method) dist.init_process_group('nccl', rank=rank, world_size=world_size, init_method=init_method) # actual training stuff train = make_loader( pt.join(datadir, '') if datadir else None, batch_size, device, world_size, rank, num_workers, # this the parameter based on which augmentation is applied to the data gpu_augmentation=False, image_rng=None, nsamples=nsamples) # lr is scaled linearly to original batch size of 256 # world_batch_size = world_size * batch_size # k = world_batch_size / 256 # lr = k * lr # outdir stuff if outdir is None: outdir = pt.join(ROOT, '../exp/', outdir_suffix) model = Net(num_classes=500, batch_size=batch_size) print('\n network parameters ', len(list(model.parameters()))) model = model.to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) #model = Unpacker(model) optimizer, policy = make_policy(num_epochs, model, lr, 0.9, wd) # loss for autoencoder loss = L1Loss(output_key='output', target_key='target_image').to(device) # this loss is for classifier classifier_loss = CrossEntropyLoss(output_key='probs', target_key='label').to(device) trainer = Trainer(model, optimizer, loss, classifier_loss, rank, AccuracyMetric(output_key='softmax_output', target_key='label'), policy, None, train, None, outdir, snapshot_interval=4 if is_master else None, quiet=True if not is_master else False) print('\n Number of epochs are: ', num_epochs) start = datetime.now() with train: trainer.train(num_epochs, start_epoch=0) print("Training complete in: " + str(datetime.now() - start))
def main(_run, _config, world_size, rank, init_method, datadir, batch_size, val_batch_size, num_workers, outdir, outdir_prefix, lr, wd, bn_momentum, bn_correct, warmup, num_epochs, resume, finetune, size, nsamples): cudnn.benchmark = True device = torch.device('cuda:0') # device is set by CUDA_VISIBLE_DEVICES torch.cuda.set_device(device) # rank 0 creates experiment observer is_master = rank == 0 # rank joins process group print('rank', rank, 'init_method', init_method) dist.init_process_group('nccl', rank=rank, world_size=world_size, init_method=init_method) # actual training stuff train = make_loader( pt.join(datadir, '') if datadir else None, batch_size, device, world_size, rank, num_workers, size, # this the parameter based on which augmentation is applied to the data gpu_augmentation=False, image_rng=None, nsamples=nsamples) # lr is scaled linearly to original batch size of 256 world_batch_size = world_size * batch_size k = world_batch_size / 256 lr = k * lr # outdir stuff if outdir is None: outdir = pt.join(outdir_prefix, '%dgpu' % (world_size, )) model = Net(num_classes=1000, batch_size=batch_size) model = model.to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) #model = Unpacker(model) optimizer, policy = make_policy(num_epochs, model, lr, 0.9) print('\n policy defined') # loss for autoencoder loss = L1Loss(output_key='output', target_key='target_image').to(device) # this loss is for classifier classifier_loss = CrossEntropyLoss(output_key='probs', target_key='label').to(device) trainer = Trainer(model, optimizer, loss, None, policy, None, train, None, outdir, snapshot_interval=5 if is_master else None, quiet=rank != 0) print('\n trainer has been initialized') start = datetime.now() with train: trainer.train(num_epochs, start_epoch=0) print("Training complete in: " + str(datetime.now() - start))
import argparse parser = argparse.ArgumentParser() parser.add_argument('--cuda', default=False, action="store_true") parser.add_argument('--force', default=False, action="store_true") parser.add_argument('--threads', default=os.cpu_count(), type=int) args = parser.parse_args() logger.debug(args) num_classes = 3 logger.info(f"... setting up models ...") create_folder("results/benchmark") model_dict = { "model_segnet_1": SegNet(num_classes=num_classes, n_init_features=1), "model_segnet_2": SegNet(num_classes=num_classes, n_init_features=2), "model_segnet_3": SegNet(num_classes=num_classes, n_init_features=3), # "mid_ssma_2": FusionNet(fusion="ssma", bottleneck=16, fusion_activ="sigmoid", num_classes=num_classes, decoders="single", branches=2), "mid_custom_2": FusionNet(fusion="custom", bottleneck=16, fusion_activ="softmax", num_classes=num_classes, decoders="single", branches=2), # "dual_ssma_2": FusionNet(fusion="ssma", bottleneck=16, fusion_activ="sigmoid", num_classes=num_classes, decoders="multi", branches=2),