def train(network, optimizer, dataloader, device): alpha = 0.6 network.train() loss_per_epoch, loss_bce_per_epoch, loss_iou_per_epoch = 0, 0, 0 process = tqdm(enumerate(dataloader)) for i, data in process: optimizer.zero_grad() img, gt = data img = img.to(device) gt = gt.to(device) pred = network(img) bceloss = criterion(pred, gt) iouloss = iou_loss(pred, gt) loss = alpha * bceloss + (1 - alpha) * iouloss loss.backward() optimizer.step() loss_per_epoch += loss.detach_().cpu().item() loss_bce_per_epoch += bceloss.detach_().cpu().item() loss_iou_per_epoch += iouloss.detach_().cpu().item() # process.set_description('BCELoss:{},IoULoss:{}'.format( # bceloss.detach_().cpu().item(),iouloss.detach_().cpu().item())) loss_dict = dict(loss=loss_per_epoch / len(dataloader), loss_bce=loss_bce_per_epoch / len(dataloader), loss_iou=loss_iou_per_epoch / len(dataloader)) return loss_dict
def __getitem__(self, idx): labels = {} line = self.dataset[idx] strs = line.strip().split() _img_data = Image.open(os.path.join(cfg.IMG_BASE_DIR, strs[0])) img_data = cfg.TRANSFORM(_img_data) for feature_size, anchors in cfg.ANCHOR_GROUPS.items(): labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM)) params = self.dataset[idx].split(" ") box = np.array([float(x) for x in params[1:6]]) cls, cx, cy, w, h = box cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMAGE_WIDTH) cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMAGE_HEIGHT) for i, anchor in enumerate(anchors): iou = utils.iou_loss(w, h, anchor) p_w, p_h = w / anchor[0], h / anchor[1] labels[feature_size][int(cy_index), int(cx_index), i] = np.array([ iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *utils.one_hot( cfg.CLASS_NUM, int(cls)) ]) return torch.Tensor(labels[13]), torch.Tensor( labels[26]), torch.Tensor(labels[52]), img_data
def __getitem__(self, x): labels = {} line = self.dataset[x] strs = line.strip().split() _img_data = Image.open(os.path.join(cfg.IMG_BASE_DIR, strs[0])) img_data = cfg.TRANSFORM(_img_data) _boxes = np.array([float(x) for x in strs[1:]]) # 将除文件名后的标签数据取出 boxes = np.split(_boxes, len(_boxes) // 5) # 将标签分割成不同的对象 for feature_size, anchors in cfg.ANCHOR_GROUPS.items(): # 将尺寸大小和锚框分别遍历出来 labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM)) # 定义标签的形状 for box in boxes: # 遍历各标签 cx, cy, w, h, cls = box cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMAGE_WIDTH) # 得到x的偏移量和索引值 cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMAGE_HEIGHT) # 得到y的偏移量和索引值 for i, anchor in enumerate(anchors): # 遍历锚框 iou = utils.iou_loss(w, h, anchor) # api: 使用iou损失 p_w, p_h = w / anchor[0], h / anchor[1] labels[feature_size][int(cy_index), int(cx_index), i] = np.array( [iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *utils.one_hot(cfg.CLASS_NUM, int(cls))]) # p_w(p_h) = log(w(h) / anchor[0](anchor[1])) shape:(cy_index, cx_index, anchor, iou+gt+category) return torch.Tensor(labels[13]), torch.Tensor(labels[26]), torch.Tensor(labels[52]), img_data
def set_up(self): with tf.variable_scope('conv1'): network = conv2d(self.input, [7, 7], 32, scope='conv1_1') network = conv2d(network, [3, 3], 32, scope='conv1_2') network = max_pool(network, 'pool1') # downsample with tf.variable_scope('conv1'): network = conv2d(network, [3, 3], 64, scope='conv1_1') network = conv2d(network, [3, 3], 64, scope='conv1_2') network = max_pool(network, 'pool2') # downsample with tf.variable_scope('conv1'): network = conv2d(network, [3, 3], 128, scope='conv1_1') network = conv2d(network, [3, 3], 128, scope='conv1_2') with tf.variable_scope('deconv1'): network = deconv2d(network, [3, 3], 64, scope='deconv1_1') # upsample network = deconv2d(network, [3, 3], 64, stride=1, scope='deconv1_1') with tf.variable_scope('deconv2'): network = deconv2d(network, [3, 3], 32, scope='deconv1_1') # upsample network = deconv2d(network, [3, 3], 32, stride=1, scope='deconv1_1') with tf.variable_scope('out_class'): logits = conv2d(network, [3, 3], 2, bn=False, relu=False, scope='logits') self.pred_prob = tf.nn.softmax(logits, name='predictions')[:, :, :, 1] self.pred = tf.argmax(logits, 3) self.loss = iou_loss(self.pred_prob, self.label) self.train_score = iou_loss(tf.cast(self.pred, tf.float32), self.label) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, epsilon=1e-4).minimize(self.loss)
def eval(network, dataloader, device): network.eval() loss_per_epoch = 0 for i, data in tqdm(enumerate(dataloader)): img, gt = data img = img.to(device) gt = gt.to(device) pred = network(img) #loss=criterion(pred,gt) loss = iou_loss(pred, gt) loss_per_epoch += loss.detach_().cpu().item() loss_dict = dict(loss=loss_per_epoch / len(dataloader)) return loss_dict
def main(): log = get_logger(LOG_DIR) transform_val = Compose([ Scale((224, 224)), ToTensor(), Normalize(mean=[0.45, 0.45, 0.45], std=[0.225, 0.225, 0.225]) ]) valset = Data(DATA_DIR, training=False, transform=transform_val) val_loader = DataLoader(valset, batch_size=1, num_workers=0) log.info('Data loaded.') log.info('Val samples:{}'.format(len(val_loader))) # set device device = torch.device('cpu') # torch.manual_seed(SEED) log.info('Torch Device:{}'.format(device)) # set model and optimizer net = FCNResNet() net.to(device) # net.init_weights() # pretrained resnet weights net.load_state_dict(torch.load('./checkpoints/06.22.23.14.41_ep36_val.pt'), strict=False) log.info('Model loaded.') for i, data in tqdm(enumerate(val_loader)): img, gt = data img = img.to(device) gt = gt.to(device) pred = net(img) iou = iou_loss(pred, gt) pred[pred < 0.5] = 0 print(iou.detach_().item()) pred = pred.detach_().numpy().squeeze(0).squeeze(0) pred = Image.fromarray((pred * 255).astype(np.uint8)) pred.save(f'{OUTPUT_DIR}/{i}.png')