def predict(task):
    net = gluon.model_zoo.vision.get_model(model_name)
    with net.name_scope():
        net.output = nn.Dense(task_num_class)
    net.load_params('../../data/%s_%s.params' % (task, model_name),
                    ctx=mx.gpu(1))
    logging.info('Training Finished. Starting Prediction.\n')
    f_out = open('../../data/submission/%s_%s.csv' % (task, model_name), 'w')
    with open('../../data/z_rank/Tests/question.csv', 'r') as f_in:
        lines = f_in.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    task_tokens = [t for t in tokens if t[1] == task]
    n = len(task_tokens)
    cnt = 0
    for path, task, _ in task_tokens:
        img_path = os.path.join('../../data/z_rank', path)
        with open(img_path, 'rb') as f:
            img = image.imdecode(f.read())
        data = transform_predict(img)
        out = net(data.as_in_context(mx.gpu(1)))
        out = nd.SoftmaxActivation(out).mean(axis=0)

        pred_out = ';'.join(["%.8f" % (o) for o in out.asnumpy().tolist()])
        line_out = ','.join([path, task, pred_out])
        f_out.write(line_out + '\n')
        cnt += 1
        progressbar(cnt, n)
    f_out.close()
Exemple #2
0
def predict(task):
    logging.info('Training Finished. Starting Prediction.\n')
    f_out = open('submission/%s.csv'%(task), 'w')  #将测试结果写入到此文件

    #加载测试集中的图像,将网络检测结果写入到文件中
    with open('data/rank/Tests/question.csv', 'r') as f_in:
        lines = f_in.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    task_tokens = [t for t in tokens if t[1] == task]
    n = len(task_tokens)
    cnt = 0
    for path, task, _ in task_tokens:
        img_path = os.path.join('data/rank', path)
        with open(img_path, 'rb') as f:
            img = image.imdecode(f.read())
        data = transform_predict(img)
        out = net(data.as_in_context(mx.gpu(0)))
        out = nd.SoftmaxActivation(out).mean(axis=0)

        pred_out = ';'.join(["%.8f"%(o) for o in out.asnumpy().tolist()])
        line_out = ','.join([path, task, pred_out])
        f_out.write(line_out + '\n')
        cnt += 1
        progressbar(cnt, n)
    f_out.close()
def validate(net, val_data, ctx):
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    AP = 0.
    AP_cnt = 0
    val_loss = 0
    all_softmax_output = []
    mAP_name = task+model_name+'.npy'
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        #data = transform_predict(data, scale)
        outputs = [net(X) for X in data]    # 将图片输入,得到16X5维的结果
        metric.update(label, outputs)
        loss = [L(yhat, y) for yhat, y in zip(outputs, label)]  # 输出16个数,代表loss
        val_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)    # loss相加求和
        #ap, cnt = calculate_ap(label, outputs)
        # softmax_output和label本身是list,但是softmax_output[0]和label[0]是NDarray格式,注意这个NDarray是mxnet的,不是numpy的NDarray
        softmax_output = [nd.SoftmaxActivation(output) for output in outputs]  # 取softmax,然后对十个结果取平均
        sm_out_label = zip(softmax_output[0].asnumpy(), label[0].asnumpy())
        all_softmax_output += sm_out_label
        np.save(mAP_name, all_softmax_output)
        #AP += ap
        #AP_cnt += cnt
    this_AP = cal_mAP(mAP_name) # 得到当前的AP
    _, val_acc = metric.get()
    return ((this_AP, val_acc, val_loss / len(val_data)))
Exemple #4
0
    def predict_cropped_images(self, dataset_path, model_path, task, gpus, network='densenet201', loss_type='sfe'):

        # with Path(dataset_path, 'Annotations/%s.csv' % task).open('r') as f:
        #     self.task_tokens = [l.rstrip().split(',') for l in f.readlines()]
        # self.task_tokens = [t for t in tokens if t[1] == task]

        results_path = self.output_submission_path.joinpath('%s.csv'%(task))
        f_out = results_path.open('w+')
        ctx = self.get_ctx()[0]

        net = get_symbol(network, task_class_num_list[task], ctx)
        net.load_params(model_path, ctx=ctx)
        logging.info("load model from %s" % model_path)

        for index, task_token in enumerate(self.task_tokens):
            img_path, raw_task = task_token[:2]
            assert raw_task == task, "task not match"
            with Path(dataset_path, img_path).open('rb') as f:
                raw_img = f.read()
            img = image.imdecode(raw_img)
            data = utils.transform_cropped_img(img)
            out = net(data.as_in_context(ctx))
            out = nd.SoftmaxActivation(out).mean(axis=0)
            pred_out = ';'.join(["%.8f"%(o) for o in out.asnumpy().tolist()])
            line_out = ','.join([img_path, task, pred_out])
            f_out.write(line_out + '\n')
            utils.progressbar(index, len(self.task_tokens))
        f_out.close()
        logging.info("end predicting for %s, results saved at %s" % (task, results_path))
def predict(task):
    logging.info('Training Finished. Starting Prediction.\n')
    f_out = open('submission/%s.csv' % (task), 'w')
    with open('data2/week-rank/Tests/question.csv', 'r') as f_in:
        lines = f_in.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    task_tokens = [t for t in tokens if t[1] == task]
    n = len(task_tokens)
    cnt = 0
    for path, task, _ in task_tokens:
        img_path = os.path.join('data2/week-rank', path)
        with open(img_path, 'rb') as f:
            img = image.imdecode(f.read())
        out_all = np.zeros([
            task_list[task],
        ])
        ###### Test Time augmentation (muti-scale test) ######
        for scale in input_scale:
            data = transform_predict(img, scale)
            with ag.predict_mode():
                out = net(data.as_in_context(
                    mx.gpu(0)))  # 随机crop十张图片,所以此处是10张图片的结果
                out = nd.SoftmaxActivation(out).mean(
                    axis=0)  # 取softmax,然后对十个结果取平均
                out_all += out.asnumpy()
        out = out_all / len(input_scale)

        pred_out = ';'.join(["%.8f" % (o) for o in out.tolist()])
        line_out = ','.join([path, task, pred_out])
        f_out.write(line_out + '\n')
        cnt += 1
        #progressbar(cnt, n)
    f_out.close()
Exemple #6
0
def predict(task, saved_path):
    logging.info('Training Finished. Starting Prediction.\n')
    rank_root = '/data/fashion/data/attribute/datasets_david/rank'
    f_out = open('submission/%s.csv' % (task), 'w+')
    with open(rank_root + '/Tests/question.csv', 'r') as f_in:
        lines = f_in.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    task_tokens = [t for t in tokens if t[1] == task]
    n = len(task_tokens)
    cnt = 0

    predictor_net = build_model()
    predictor_ctx = mx.gpu(num_gpus[0]) if len(num_gpus) > 0 else mx.cpu()
    predictor_net.load_params(saved_path, ctx=predictor_ctx)
    logging.info("load model from %s" % saved_path)

    for path, task, _ in task_tokens:
        img_path = os.path.join(rank_root, path)
        with open(img_path, 'rb') as f:
            img = image.imdecode(f.read())
        data = transform_predict(img)
        out = predictor_net(data.as_in_context(predictor_ctx))
        out = nd.SoftmaxActivation(out).mean(axis=0)

        pred_out = ';'.join(["%.8f" % (o) for o in out.asnumpy().tolist()])
        line_out = ','.join([path, task, pred_out])
        f_out.write(line_out + '\n')
        cnt += 1
        progressbar(cnt, n)
    f_out.close()
def predict(x, net, ctx):
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(
        cls_preds.transpose((0, 2, 1)), mode='channel')

    return MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False)
    '''
Exemple #8
0
def detect_image(img_file):
    if not os.path.exists(img_file):
        print('can not find image: ', img_file)
    img = Image.open(img_file)
    img = ImageOps.fit(img, [data_shape, data_shape], Image.ANTIALIAS)
    print(img)
    origin_img = np.array(img)
    img = origin_img - np.array([123, 117, 104])
    # organize as [batch-channel-height-width]
    img = np.transpose(img, (2, 0, 1))
    img = img[np.newaxis, :]
    # convert to ndarray
    img = nd.array(img)
    print('input image shape: ', img.shape)

    net = ToySSD(num_class)
    ctx = mx.cpu()
    net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    net.collect_params().reset_ctx(ctx)
    params = 'ssd_pretrained.params'
    net.load_params(params, ctx=ctx)

    anchors, cls_preds, box_preds = net(img.as_in_context(ctx))
    print('anchors', anchors)
    print('class predictions', cls_preds)
    print('box delta predictions', box_preds)

    # convert predictions to probabilities using softmax
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
    # apply shifts to anchors boxes, non-maximum-suppression, etc...
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False)
    output = output.asnumpy()
    print(output)
    print(output.shape)
    pens = dict()

    plt.imshow(origin_img)

    thresh = 0.69
    for det in output[0]:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        if cid not in pens:
            pens[cid] = (random.random(), random.random(), random.random())
        scales = [origin_img.shape[1], origin_img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=pens[cid], linewidth=3)
        plt.gca().add_patch(rect)
        text = class_names[cid]
        plt.gca().text(xmin, ymin - 2, '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor=pens[cid], alpha=0.5),
                       fontsize=12, color='white')
    plt.axis('off')
    plt.savefig('result.png', dpi=100)
    plt.show()
Exemple #9
0
def predict_bounding_boxes(net, image, bb):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
    
    bb: [(x1, y1, x2, y2)]
    A tuple that contains the bounding box.
    
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    
    actual_bb: [(x, y, w, h)]
    The actual bounding bounding boxes.
    '''
    image, bb = transform(image, bb)

    image = image.as_in_context(ctx[0])
    image = image.expand_dims(axis=0)

    bb = bb.as_in_context(ctx[0])
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)
    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)
    cls_probs = nd.SoftmaxActivation(nd.transpose(class_predictions,
                                                  (0, 2, 1)),
                                     mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    labeled_bb = bb[:, :, 1:].asnumpy()
    labeled_bb[:, :, 2] = labeled_bb[:, :, 2] - labeled_bb[:, :, 0]
    labeled_bb[:, :, 3] = labeled_bb[:, :, 3] - labeled_bb[:, :, 1]
    labeled_bb = labeled_bb[0]
    return predicted_bb, labeled_bb
Exemple #10
0
def inference(x, epochs= 295):
    ctx = mx.cpu(1)
    net = ToySSD(1)
    start_time = time.time()
    net.load_params('models/ssd_%d.params' % epochs, ctx)
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0,2,1)), mode = 'channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress = False, clip = False, nms_threshold = 0.001 )
    end_time = time.time()
    print(end_time-start_time)
    return output
Exemple #11
0
def forward(img_path, net):
    ctx = mx.gpu(1)
    img_original = cv2.imread(img_path)
    img = preprocess(img_original)
    anchors, cls_preds, box_preds = net(img.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)),
                                     mode='channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors],
                               force_suppress=True,
                               clip=True,
                               nms_threshold=0.01)
    return img_original, output
Exemple #12
0
def inference(x, epochs=295):
    ctx = mx.cpu(1)
    net = ToySSD(1)
    net.load_params('models/ssd_%d.params' % epochs, ctx)
    print("load sucecuss")
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)),
                                     mode='channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors],
                               force_suppress=True,
                               clip=False)
    return output
Exemple #13
0
def evaluate_metrics(metrics,
                     data_iterator,
                     net,
                     nb_batches=None,
                     ctx=mx.gpu(),
                     sparse_policy_label=False,
                     apply_select_policy_from_plane=True):
    """
    Runs inference of the network on a data_iterator object and evaluates the given metrics.
    The metric results are returned as a dictionary object.

    :param metrics: List of mxnet metrics which must have the
    names ['value_loss', 'policy_loss', 'value_acc_sign', 'policy_acc']
    :param data_iterator: Gluon data iterator object
    :param net: Gluon network handle
    :param nb_batches: Number of batches to evaluate (early stopping).
     If set to None all batches of the data_iterator will be evaluated
    :param ctx: MXNET data context
    :param sparse_policy_label: Should be set to true if the policy uses one-hot encoded targets
     (e.g. supervised learning)
    :param apply_select_policy_from_plane: If true, given policy label is converted to policy map index
    :return:
    """
    reset_metrics(metrics)
    for i, (data, value_label, policy_label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        value_label = value_label.as_in_context(ctx)
        policy_label = policy_label.as_in_context(ctx)
        [value_out, policy_out] = net(data)
        value_out[0][0].wait_to_read()
        if apply_select_policy_from_plane:
            policy_out = policy_out[:, FLAT_PLANE_IDX]
        # update the metrics
        metrics["value_loss"].update(preds=value_out, labels=value_label)
        metrics["policy_loss"].update(preds=nd.SoftmaxActivation(policy_out),
                                      labels=policy_label)
        metrics["value_acc_sign"].update(preds=value_out, labels=value_label)
        metrics["policy_acc"].update(preds=nd.argmax(policy_out, axis=1),
                                     labels=policy_label if sparse_policy_label
                                     else nd.argmax(policy_label, axis=1))
        # stop after evaluating x batches (only recommended to use this for the train set evaluation)
        if nb_batches and i == nb_batches:
            break

    metric_values = {
        "loss":
        0.01 * metrics["value_loss"].get()[1] +
        0.99 * metrics["policy_loss"].get()[1]
    }

    for metric in metrics.values():
        metric_values[metric.get()[0]] = metric.get()[1]
    return metric_values
Exemple #14
0
def classify_hotdog(net, fname):
    with open(fname, 'rb') as f:
        img = image.imdecode(f.read())
    data, _ = transform(img, -1, test_augs)
    plt.imshow(data.transpose((1, 2, 0)).asnumpy() / 255)
    data = data.expand_dims(axis=0)
    out = net(data.as_in_context(ctx[0]))
    out = nd.SoftmaxActivation(out)
    pred = int(nd.argmax(out, axis=1).asscalar())
    prob = out[0][pred].asscalar()
    label = train_imgs.synsets
    return 'With prob=%f, %s' % (prob, label[pred])
Exemple #15
0
def predict_cv(net, ctx, fname, label):
    img = cv2.imread(fname)
    img = cv2.resize(img, (image_size, image_size))
    data, _ = transform(nd.array(img), -1)
    plt.imshow(data.transpose((1, 2, 0)).asnumpy() / 255)
    data = data.expand_dims(axis=0)
    out = net(data.as_in_context(ctx))
    out = nd.SoftmaxActivation(out)
    pred = int(nd.argmax(out, axis=1).asscalar())
    prob = out[0][pred].asscalar()
    print(prob, pred)
    return '置信度=%f, 类别 %s' % (prob, label[str(pred)])
Exemple #16
0
def score_image(image_base64_string):
    with open('target_image.jpg', 'wb') as f:
        f.write(base64.b64decode(image_base64_string))
        f.close()
    with open('target_image.jpg', 'rb') as f:
        img = image.imdecode(f.read())
    data, _ = transform(img, -1, test_augs)
    data.transpose((1, 2, 0)).asnumpy()/255
    data = data.expand_dims(axis=0)
    net.forward(batch([data]), is_train=False)
    out = net.get_outputs()[0]
    out = nd.SoftmaxActivation(out)
    return int(out[0][1].asscalar() * 100)
def classify(fname):
    train_ds = vision.ImageFolderDataset('train',
                                         flag=1,
                                         transform=transform_train)
    with open(fname, 'rb') as f:
        img = image.imdecode(f.read())
    data = image.imresize(img.astype('float32') / 255, 32, 32)
    data = nd.transpose(data, (2, 0, 1))
    data = data.expand_dims(axis=0)
    net = get_net(mx.cpu(0))
    net.load_params('model.params', mx.cpu(0))
    out = net(data.as_in_context(mx.cpu(0)))
    out = nd.SoftmaxActivation(out)
    pred = int(nd.argmax(out, axis=1).asscalar())
    label = train_ds.synsets
    return label[pred]
Exemple #18
0
def evaluate_metrics(metrics,
                     data_iterator,
                     net,
                     nb_batches=None,
                     ctx=mx.gpu()):
    """
    Runs inference of the network on a data_iterator object and evaluates the given metrics.
    The metric results are returned as a dictionary object.

    :param metrics: List of mxnet metrics which must have the
    names ['value_loss', 'policy_loss', 'value_acc_sign', 'policy_acc']
    :param data_iterator: Gluon dataiterator object
    :param net: Gluon network handle
    :param nb_batches: Number of batches to evaluate (early stopping).
     If set to None all batches of the data_iterator will be evaluated
    :param ctx: MXNET data context
    :return:
    """
    reset_metrics(metrics)
    for i, (data, value_label, policy_label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        value_label = value_label.as_in_context(ctx)
        policy_label = policy_label.as_in_context(ctx)

        [value_out, policy_out] = net(data)

        # update the metrics
        metrics["value_loss"].update(preds=value_out, labels=value_label)
        metrics["policy_loss"].update(preds=nd.SoftmaxActivation(policy_out),
                                      labels=policy_label)
        metrics["value_acc_sign"].update(preds=value_out, labels=value_label)
        metrics["policy_acc"].update(preds=nd.argmax(policy_out, axis=1),
                                     labels=policy_label)

        # stop after evaluating x batches (only recommeded to use this for the train set evaluation)
        if nb_batches is not None and i == nb_batches:
            break

    metric_values = {}
    metric_values["loss"] = 0.01 * metrics["value_loss"].get(
    )[1] + 0.99 * metrics["policy_loss"].get()[1]

    for metric in metrics.values():
        metric_values[metric.get()[0]] = metric.get()[1]

    return metric_values
Exemple #19
0
def default_train_fn(epoch, num_epochs, net, batch, batch_size, criterion, trainer, batch_fn, ctx,
                     mixup=False, label_smoothing=False, distillation=False,
                     mixup_alpha=0.2, mixup_off_epoch=0, classes=1000,
                     dtype='float32', metric=None, teacher_prob=None):
    data, label = batch_fn(batch, ctx)
    if mixup:
        lam = np.random.beta(mixup_alpha, mixup_alpha)
        if epoch >= num_epochs - mixup_off_epoch:
            lam = 1
        data = [lam * X + (1 - lam) * X[::-1] for X in data]
        if label_smoothing:
            eta = 0.1
        else:
            eta = 0.0
        label = mixup_transform(label, classes, lam, eta)
    elif label_smoothing:
        hard_label = label
        label = smooth(label, classes)
    with mx.autograd.record():
        outputs = [net(X.astype(dtype, copy=False)) for X in data]
        if distillation:
            loss = [criterion(yhat.astype('float', copy=False),
                      y.astype('float', copy=False),
                      p.astype('float', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob(data))]
        else:
            loss = [criterion(yhat, y.astype(dtype, copy=False)) for yhat, y in zip(outputs, label)]
    for l in loss:
        l.backward()
    trainer.step(batch_size, ignore_stale_grad=True)

    if metric:
        if mixup:
            output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                              for out in outputs]
            metric.update(label, output_softmax)
        else:
            if label_smoothing:
                metric.update(hard_label, outputs)
            else:
                metric.update(label, outputs)
        return metric
    else:
        return
Exemple #20
0
def predict_mxnet(net, ctx, fname, label):
    '''
    使用mxnet对图像进行预测
    :param net:训练好的模型
    :param ctx:数据context
    :param fname:图像路径
    :param label:标签词典
    :return:预测类别及概率
    '''
    with open(fname, 'rb') as f:
        img = image.imdecode(f.read())
        img = image.ForceResizeAug((image_size, image_size))(img)
    data, _ = transform_test(img, -1)
    data = data.expand_dims(axis=0)
    out = net(data.as_in_context(ctx))
    out = nd.SoftmaxActivation(out)
    pred = int(nd.argmax(out, axis=1).asscalar())
    prob = out[0][pred].asscalar()
    return '置信度=%f, 类别 %s' % (prob, label[str(pred)])
def predict(net, ctx, input_dir, threshold=0.9):
    '''
    Using param-loaded model to predict the classification probabilty of input image.
    使用CNN模型预测输入图片的分类概率。
    :param net: param loaded CNN net
    :param ctx: computing device
    :param input_dir: input image directory
    :param threshold: probability threshold
    :return None:
    '''
    movie_list = os.listdir(input_dir)  # movie list
    movie_list.sort()
    for movie in movie_list:
        image_list = os.listdir(os.path.join(input_dir, movie))  # image list
        for _image in image_list:
            image_file = os.path.join(input_dir, movie, _image)
            try:  # try to read and decode
                with open(image_file, 'rb') as f:
                    img = image.imdecode(f.read())
            except Exception as e:
                print('Fail to read image %s in movie %' % (_image, movie))
                print('And the error is ', e)
                continue
            # predict
            data = transformPredict(img)
            data = data.as_in_context(ctx)
            out = net(data)
            out = nd.SoftmaxActivation(out).mean(axis=0)  # softmax process
            out = out.asnumpy().tolist()  # array to list
            # judge and delete
            if (out[2] > threshold) or (out[3] > threshold):
                os.remove(os.path.join(input_dir, movie, _image))
            # you can just write the result into file without doing anything.
            out = [str(number) for number in out]
            string = '%s:%s' % (image_file, ','.join(out))
            writeResult(string + '\n')
            # you can also move these images to another directory

        print('Movie %s finished.' % movie)
Exemple #22
0
    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params is '':
            net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        if opt.no_wd:
            for _, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        trainer = gluon.Trainer(net.collect_params(), optimizer,
                                optimizer_params)
        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            L = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss()

        best_val_score = 1

        first_fwd = True
        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            train_metric.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                with ag.record():
                    outputs = [
                        net(X.astype(opt.dtype, copy=False)) for X in data
                    ]
                    loss = [
                        L(yhat, y.astype(opt.dtype, copy=False))
                        for yhat, y in zip(outputs, label)
                    ]
                for l in loss:
                    l.backward()

                if epoch == 0 and first_fwd and opt.mode == 'hybrid':
                    net.export("/tmp/net")
                    mx.visualization.print_summary(
                        mx.symbol.load('/tmp/net-symbol.json'),
                        shape={
                            'data':
                            (batch_size, 3, opt.input_size, opt.input_size)
                        })
                    first_fwd = False

                trainer._optimizer.lr_scheduler.update(i, epoch)
                lr_scheduler.update(i, epoch)
                trainer.step(batch_size)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'
                        % (epoch, i, batch_size * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score, trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i / (time.time() - tic))

            err_top1_val, err_top5_val = test(ctx, val_data)

            logger.info('[Epoch %d] training: %s=%f' %
                        (epoch, train_metric_name, train_metric_score))
            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' %
                        (epoch, err_top1_val, err_top5_val))

            if err_top1_val < best_val_score:
                best_val_score = err_top1_val
                net.save_parameters(
                    '%s/%.4f-imagenet-%s-%d-best.params' %
                    (save_dir, best_val_score, model_name, epoch))
                trainer.save_states(
                    '%s/%.4f-imagenet-%s-%d-best.states' %
                    (save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch +
                                                1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params' %
                                    (save_dir, model_name, epoch))
                trainer.save_states('%s/imagenet-%s-%d.states' %
                                    (save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params' %
                                (save_dir, model_name, opt.num_epochs - 1))
            trainer.save_states('%s/imagenet-%s-%d.states' %
                                (save_dir, model_name, opt.num_epochs - 1))
Exemple #23
0
def train(epochs, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    net.initialize(mx.init.Xavier(), ctx=ctx)

    train_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
        batch_size=batch_size,
        shuffle=True,
        last_batch='discard',
        num_workers=num_workers)

    val_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers)

    trainer = gluon.Trainer(net.collect_params(), optimizer, {
        'learning_rate': opt.lr,
        'wd': opt.wd,
        'momentum': opt.momentum
    })
    metric = mx.metric.Accuracy()
    train_metric = mx.metric.RMSE()
    loss_fn = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
    train_history = TrainingHistory(['training-error', 'validation-error'])

    iteration = 0
    lr_decay_count = 0

    best_val_score = 0

    for epoch in range(epochs):
        tic = time.time()
        train_metric.reset()
        metric.reset()
        train_loss = 0
        num_batch = len(train_data)
        alpha = 1

        if epoch == lr_decay_epoch[lr_decay_count]:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
            lr_decay_count += 1

        for i, batch in enumerate(train_data):
            lam = np.random.beta(alpha, alpha)
            if epoch >= epochs - 20:
                lam = 1

            data_1 = gluon.utils.split_and_load(batch[0],
                                                ctx_list=ctx,
                                                batch_axis=0)
            label_1 = gluon.utils.split_and_load(batch[1],
                                                 ctx_list=ctx,
                                                 batch_axis=0)

            data = [lam * X + (1 - lam) * X[::-1] for X in data_1]
            label = []
            for Y in label_1:
                y1 = label_transform(Y, classes)
                y2 = label_transform(Y[::-1], classes)
                label.append(lam * y1 + (1 - lam) * y2)

            with ag.record():
                output = [net(X) for X in data]
                loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
            for l in loss:
                l.backward()
            trainer.step(batch_size)
            train_loss += sum([l.sum().asscalar() for l in loss])

            output_softmax = [nd.SoftmaxActivation(out) for out in output]
            train_metric.update(label, output_softmax)
            name, acc = train_metric.get()
            iteration += 1

        train_loss /= batch_size * num_batch
        name, acc = train_metric.get()
        name, val_acc = test(ctx, val_data)
        train_history.update([acc, 1 - val_acc])
        train_history.plot(save_path='%s/%s_history.png' %
                           (plot_name, model_name))

        if val_acc > best_val_score:
            best_val_score = val_acc
            net.save_parameters('%s/%.4f-cifar-%s-%d-best.params' %
                                (save_dir, best_val_score, model_name, epoch))

        name, val_acc = test(ctx, val_data)
        logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' %
                     (epoch, acc, val_acc, train_loss, time.time() - tic))

        if save_period and save_dir and (epoch + 1) % save_period == 0:
            net.save_parameters('%s/cifar10-%s-%d.params' %
                                (save_dir, model_name, epoch))

    if save_period and save_dir:
        net.save_parameters('%s/cifar10-%s-%d.params' %
                            (save_dir, model_name, epochs - 1))
Exemple #24
0
    return image

image = cv2.imread('img/pikachu.jpg')
x = preprocess(image)
print('x', x.shape)

# if pre-trained model is provided, we can load it
# net.load_params('ssd_%d.params' % epochs, ctx)
anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
print('anchors', anchors)
print('class predictions', cls_preds)
print('box delta predictions', box_preds)

from mxnet.contrib.ndarray import MultiBoxDetection
# convert predictions to probabilities using softmax
cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
# apply shifts to anchors boxes, non-maximum-suppression, etc...
output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False)
print(output)


def display(img, out, thresh=0.5):
    import random
    import matplotlib as mpl
    mpl.rcParams['figure.figsize'] = (10,10)
    pens = dict()
    plt.clf()
    plt.imshow(img)
    for det in out:
        cid = int(det[0])
        if cid < 0:
Exemple #25
0
    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params == '':
            net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        if opt.summary:
            # net.summary(mx.nd.zeros((1, 3, opt.input_size, opt.input_size), ctx=ctx[0]))
            summary(net, mx.nd.zeros((1, 3, opt.input_size, opt.input_size), ctx=ctx[0]))
            sys.exit()

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)
        if opt.resume_states != '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True
        if distillation:
            L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(temperature=opt.temperature,
                                                                 hard_weight=opt.hard_weight,
                                                                 sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=sparse_label_loss)

        best_val_score = 1

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            train_metric.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam*X + (1-lam)*X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \
                                    for X in data]

                with ag.record():
                    outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                    if distillation:
                        loss = [L(yhat.astype('float32', copy=False),
                                  y.astype('float32', copy=False),
                                  p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)]
                    else:
                        loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if opt.log_interval and not (i+1)%opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'%(
                                epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                                train_metric_name, train_metric_score, trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i /(time.time() - tic))

            err_top1_val, err_top5_val = test(ctx, val_data)

            logger.info('[Epoch %d] training: %s=%f'%(epoch, train_metric_name, train_metric_score))
            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f'%(epoch, throughput, time.time()-tic))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f'%(epoch, err_top1_val, err_top5_val))

            if err_top1_val < best_val_score:
                best_val_score = err_top1_val
                net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch))
                trainer.save_states('%s/%.4f-imagenet-%s-%d-best.states'%(save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, epoch))
                trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))
            trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))
Exemple #26
0
def run_epoch(e, network, dataloader, trainer, print_name, is_train,
              update_metric):
    '''
    Run one epoch to train or test the SSD network
    
    Parameters
    ----------
        
    e: int
        The epoch number

    network: nn.Gluon.HybridSequential
        The SSD network

    dataloader: gluon.data.DataLoader
        The train or testing dataloader that is wrapped around the iam_dataset
    
    print_name: Str
        Name to print for associating with the data. usually this will be "train" and "test"
    
    is_train: bool
        Boolean to indicate whether or not the CNN should be updated. is_train should only be set to true for the training data

    Returns
    -------

    network: gluon.nn.HybridSequential
        The class predictor network
    '''

    total_losses = [0 for ctx_i in ctx]
    for i, (X, Y) in enumerate(dataloader):
        X = gluon.utils.split_and_load(X, ctx)
        Y = gluon.utils.split_and_load(Y, ctx)

        with autograd.record(train_mode=is_train):
            losses = []
            for x, y in zip(X, Y):
                default_anchors, class_predictions, box_predictions = network(
                    x)
                box_target, box_mask, cls_target = network.training_targets(
                    default_anchors, class_predictions, y)
                # losses
                loss_class = cls_loss(class_predictions, cls_target)
                loss_box = box_loss(box_predictions, box_target, box_mask)
                # sum all losses
                loss = loss_class + loss_box
                losses.append(loss)

        if is_train:
            for loss in losses:
                loss.backward()
            step_size = 0
            for x in X:
                step_size += x.shape[0]
            trainer.step(step_size)

        for index, loss in enumerate(losses):
            total_losses[index] += loss.mean().asscalar()

        if update_metric:
            cls_metric.update([cls_target],
                              [nd.transpose(class_predictions, (0, 2, 1))])
            box_metric.update([box_target], [box_predictions * box_mask])

        if i == 0 and e % send_image_every_n == 0 and e > 0:
            cls_probs = nd.SoftmaxActivation(nd.transpose(
                class_predictions, (0, 2, 1)),
                                             mode='channel')
            output_image, number_of_bbs = generate_output_image(
                box_predictions, default_anchors, cls_probs, box_target,
                box_mask, cls_target, x, y)
            print("Number of predicted {} BBs = {}".format(
                print_name, number_of_bbs))

    total_loss = 0
    for loss in total_losses:
        total_loss += loss / (len(dataloader) * len(total_losses))

    return total_loss
    def _train_loop(self, train_data, val_data):
        if self._cfg.train.no_wd:
            for k, v in self.net.collect_params(
                    '.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0
        if self._cfg.train.label_smoothing or self._cfg.train.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True
        if self.distillation:
            L = loss.DistillationSoftmaxCrossEntropyLoss(
                temperature=self._cfg.train.temperature,
                hard_weight=self._cfg.train.hard_weight,
                sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(
                sparse_label=sparse_label_loss)

        if self._cfg.train.mixup:
            train_metric = mx.metric.RMSE()
        else:
            train_metric = mx.metric.Accuracy()
        if self._cfg.train.mode == 'hybrid':
            self.net.hybridize(static_alloc=True, static_shape=True)
            if self.distillation:
                self.teacher.hybridize(static_alloc=True, static_shape=True)

        self._logger.info('Start training from [Epoch %d]',
                          max(self._cfg.train.start_epoch, self.epoch))
        for self.epoch in range(max(self._cfg.train.start_epoch, self.epoch),
                                self._cfg.train.epochs):
            epoch = self.epoch
            if self._best_acc >= 1.0:
                self._logger.info(
                    '[Epoch {}] Early stopping as acc is reaching 1.0'.format(
                        epoch))
                break
            mx.nd.waitall()
            tic = time.time()
            btic = time.time()
            if self._cfg.train.use_rec:
                train_data.reset()
            train_metric.reset()

            # pylint: disable=undefined-loop-variable
            for i, batch in enumerate(train_data):
                data, label = self.batch_fn(batch, self.ctx)

                if self._cfg.train.mixup:
                    lam = np.random.beta(self._cfg.train.mixup_alpha,
                                         self._cfg.train.mixup_alpha)
                    if epoch >= self._cfg.train.epochs - self._cfg.train.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if self._cfg.train.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif self._cfg.train.label_smoothing:
                    hard_label = label
                    label = smooth(label, self.num_class)

                if self.distillation:
                    teacher_prob = [nd.softmax(self.teacher(X.astype(self._cfg.train.dtype, copy=False)) \
                                    / self._cfg.train.temperature) for X in data]

                with ag.record():
                    outputs = [
                        self.net(X.astype(self._cfg.train.dtype, copy=False))
                        for X in data
                    ]
                    if self.distillation:
                        losses = [L(yhat.astype('float32', copy=False),
                                    y.astype('float32', copy=False),
                                    p.astype('float32', copy=False)) \
                                        for yhat, y, p in zip(outputs, label, teacher_prob)]
                    else:
                        losses = [
                            L(yhat, y.astype(self._cfg.train.dtype,
                                             copy=False))
                            for yhat, y in zip(outputs, label)
                        ]
                for l in losses:
                    l.backward()
                self.trainer.step(self.batch_size)

                if self._cfg.train.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if self._cfg.train.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if self._cfg.train.log_interval and not (
                        i + 1) % self._cfg.train.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    self._logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f',
                        epoch, i, self._cfg.train.batch_size *
                        self._cfg.train.log_interval / (time.time() - btic),
                        train_metric_name, train_metric_score,
                        self.trainer.learning_rate)
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(self.batch_size * i / (time.time() - tic))

            top1_val, top5_val = self._evaluate(val_data)

            self._logger.info('[Epoch %d] training: %s=%f', epoch,
                              train_metric_name, train_metric_score)
            self._logger.info(
                '[Epoch %d] speed: %d samples/sec\ttime cost: %f', epoch,
                throughput,
                time.time() - tic)
            self._logger.info('[Epoch %d] validation: top1=%f top5=%f', epoch,
                              top1_val, top5_val)

            if top1_val > self._best_acc:
                cp_name = os.path.join(self._logdir, 'best_checkpoint.pkl')
                self._logger.info(
                    '[Epoch %d] Current best top-1: %f vs previous %f, saved to %s',
                    self.epoch, top1_val, self._best_acc, cp_name)
                self.save(cp_name)
                self._best_acc = top1_val
            if self._reporter:
                self._reporter(epoch=epoch, acc_reward=top1_val)
            self._time_elapsed += time.time() - btic
        return {
            'train_acc': train_metric_score,
            'valid_acc': self._best_acc,
            'time': self._time_elapsed
        }
Exemple #28
0
    def train(epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        net.initialize(mx.init.Xavier(), ctx=ctx)

        if opt.summary:
            summary(net, mx.nd.zeros((1, 3, 32, 32), ctx=ctx[0]))
            sys.exit()

        if opt.dataset == 'cifar10':
            train_data = gluon.data.DataLoader(
                gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
                batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
            val_data = gluon.data.DataLoader(
                gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
                batch_size=batch_size, shuffle=False, num_workers=num_workers)
        elif opt.dataset == 'cifar100':
            train_data = gluon.data.DataLoader(
                gluon.data.vision.CIFAR100(train=True).transform_first(transform_train),
                batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
            val_data = gluon.data.DataLoader(
                gluon.data.vision.CIFAR100(train=False).transform_first(transform_test),
                batch_size=batch_size, shuffle=False, num_workers=num_workers)
        else:
            raise ValueError('Unknown Dataset')

        if opt.no_wd and opt.cosine:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True

        metric = mx.metric.Accuracy()
        train_metric = mx.metric.RMSE()
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=sparse_label_loss)
        train_history = TrainingHistory(['training-error', 'validation-error'])

        iteration = 0
        lr_decay_count = 0

        best_val_score = 0

        for epoch in range(epochs):
            tic = time.time()
            train_metric.reset()
            metric.reset()
            train_loss = 0
            num_batch = len(train_data)

            if not opt.cosine:
                if epoch == lr_decay_epoch[lr_decay_count]:
                    trainer.set_learning_rate(trainer.learning_rate * lr_decay)
                    lr_decay_count += 1

            for i, batch in enumerate(train_data):
                data_1 = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
                label_1 = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if (epoch >= epochs - opt.mixup_off_epoch) or not opt.mixup:
                        lam = 1

                    data = [lam * X + (1 - lam) * X[::-1] for X in data_1]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label_1, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label_1
                    label = smooth(label_1, classes)

                with ag.record():
                    output = [net(X) for X in data]
                    loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out) for out in output]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, output)
                    else:
                        train_metric.update(label, output)

                name, acc = train_metric.get()
                iteration += 1

            train_loss /= batch_size * num_batch
            name, acc = train_metric.get()
            name, val_acc = test(ctx, val_data)
            train_history.update([acc, 1 - val_acc])
            train_history.plot(save_path='%s/%s_history.png' % (plot_name, model_name))

            if val_acc > best_val_score:
                best_val_score = val_acc
                net.save_parameters('%s/%.4f-%s-best.params' %
                                    (save_dir, best_val_score, model_name))

            name, val_acc = test(ctx, val_data)
            logging.info('[Epoch %d] train=%f val=%f loss=%f lr: %f time: %f' %
                         (epoch, acc, val_acc, train_loss, trainer.learning_rate,
                          time.time() - tic))

        host_name = socket.gethostname()
        with open(opt.dataset + '_' + host_name + '_GPU_' + opt.gpus + '_best_Acc.log', 'a') as f:
            f.write('best Acc: {:.4f}\n'.format(best_val_score))
        print("best_val_score: ", best_val_score)
Exemple #29
0
    def train(epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        if config.train_cfg.param_init:
            init_func = getattr(mx.init, config.train_cfg.init)
            net.initialize(init_func(), ctx=ctx, force_reinit=True)
        else:
            net.load_parameters(config.train_cfg.param_file, ctx=ctx)

        summary(net, stat_name, nd.uniform(
            shape=(1, 3, imgsize, imgsize), ctx=ctx[0]))
        # net = nn.HybridBlock()
        net.hybridize()

        root = config.dir_cfg.dataset
        train_data = gluon.data.DataLoader(
            gluon.data.vision.CIFAR10(
                root=root, train=True).transform_first(transform_train),
            batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)

        val_data = gluon.data.DataLoader(
            gluon.data.vision.CIFAR10(
                root=root, train=False).transform_first(transform_test),
            batch_size=batch_size, shuffle=False, num_workers=num_workers)

        trainer_arg = {'learning_rate': config.lr_cfg.lr,
                       'wd': config.lr_cfg.wd, 'lr_scheduler': lr_sch}
        extra_arg = eval(config.lr_cfg.extra_arg)
        trainer_arg.update(extra_arg)
        trainer = gluon.Trainer(net.collect_params(), optimizer, trainer_arg)
        if config.train_cfg.amp:
            amp.init_trainer(trainer)
        metric = mx.metric.Accuracy()
        train_metric = mx.metric.RMSE()
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss(
            sparse_label=False if config.data_cfg.mixup else True)
        train_history = TrainingHistory(['training-error', 'validation-error'])
        # acc_history = TrainingHistory(['training-acc', 'validation-acc'])
        loss_history = TrainingHistory(['training-loss', 'validation-loss'])

        iteration = 0

        best_val_score = 0

        # print('start training')
        sig_state.emit(1)
        sig_pgbar.emit(0)
        # signal.emit('Training')
        for epoch in range(epochs):
            tic = time.time()
            train_metric.reset()
            metric.reset()
            train_loss = 0
            num_batch = len(train_data)
            alpha = 1
            for i, batch in enumerate(train_data):
                if epoch == 0 and iteration == 1 and config.save_cfg.profiler:
                    profiler.set_state('run')
                    is_profiler_run = True
                if epoch == 0 and iteration == 1 and config.save_cfg.tensorboard:
                    sw.add_graph(net)
                lam = np.random.beta(alpha, alpha)
                if epoch >= epochs - 20 or not config.data_cfg.mixup:
                    lam = 1

                data_1 = gluon.utils.split_and_load(
                    batch[0], ctx_list=ctx, batch_axis=0)
                label_1 = gluon.utils.split_and_load(
                    batch[1], ctx_list=ctx, batch_axis=0)

                if not config.data_cfg.mixup:
                    data = data_1
                    label = label_1
                else:
                    data = [lam*X + (1-lam)*X[::-1] for X in data_1]
                    label = []
                    for Y in label_1:
                        y1 = label_transform(Y, classes)
                        y2 = label_transform(Y[::-1], classes)
                        label.append(lam*y1 + (1-lam)*y2)

                with ag.record():
                    output = [net(X) for X in data]
                    loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
                if config.train_cfg.amp:
                    with ag.record():
                        with amp.scale_loss(loss, trainer) as scaled_loss:
                            ag.backward(scaled_loss)
                            # scaled_loss.backward()
                else:
                    for l in loss:
                        l.backward()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])

                output_softmax = [nd.SoftmaxActivation(out) for out in output]
                train_metric.update(label, output_softmax)
                metric.update(label_1, output_softmax)
                name, acc = train_metric.get()
                if config.save_cfg.tensorboard:
                    sw.add_scalar(tag='lr', value=trainer.learning_rate,
                                  global_step=iteration)
                if epoch == 0 and iteration == 1 and config.save_cfg.profiler:
                    nd.waitall()
                    profiler.set_state('stop')
                    profiler.dump()
                iteration += 1
                sig_pgbar.emit(iteration)
                if check_flag()[0]:
                    sig_state.emit(2)
                while(check_flag()[0] or check_flag()[1]):
                    if check_flag()[1]:
                        print('stop')
                        return
                    else:
                        time.sleep(5)
                        print('pausing')

            epoch_time = time.time() - tic
            train_loss /= batch_size * num_batch
            name, acc = train_metric.get()
            _, train_acc = metric.get()
            name, val_acc, _ = test(ctx, val_data)
            # if config.data_cfg.mixup:
            #     train_history.update([acc, 1-val_acc])
            #     plt.cla()
            #     train_history.plot(save_path='%s/%s_history.png' %
            #                        (plot_name, model_name))
            # else:
            train_history.update([1-train_acc, 1-val_acc])
            plt.cla()
            train_history.plot(save_path='%s/%s_history.png' %
                               (plot_name, model_name))

            if val_acc > best_val_score:
                best_val_score = val_acc
                net.save_parameters('%s/%.4f-cifar-%s-%d-best.params' %
                                    (save_dir, best_val_score, model_name, epoch))

            current_lr = trainer.learning_rate
            name, val_acc, val_loss = test(ctx, val_data)

            logging.info('[Epoch %d] loss=%f train_acc=%f train_RMSE=%f\n     val_acc=%f val_loss=%f lr=%f time: %f' %
                         (epoch, train_loss, train_acc, acc, val_acc, val_loss, current_lr, epoch_time))
            loss_history.update([train_loss, val_loss])
            plt.cla()
            loss_history.plot(save_path='%s/%s_loss.png' %
                              (plot_name, model_name), y_lim=(0, 2), legend_loc='best')
            if config.save_cfg.tensorboard:
                sw._add_scalars(tag='Acc',
                                scalar_dict={'train_acc': train_acc, 'test_acc': val_acc}, global_step=epoch)
                sw._add_scalars(tag='Loss',
                                scalar_dict={'train_loss': train_loss, 'test_loss': val_loss}, global_step=epoch)

            sig_table.emit([epoch, train_loss, train_acc,
                            val_loss, val_acc, current_lr, epoch_time])
            csv_writer.writerow([epoch, train_loss, train_acc,
                                 val_loss, val_acc, current_lr, epoch_time])
            csv_file.flush()

            if save_period and save_dir and (epoch + 1) % save_period == 0:
                net.save_parameters('%s/cifar10-%s-%d.params' %
                                    (save_dir, model_name, epoch))
        if save_period and save_dir:
            net.save_parameters('%s/cifar10-%s-%d.params' %
                                (save_dir, model_name, epochs-1))
Exemple #30
0
        def train_epoch(pool=None,
                        pool_lock=None,
                        shared_finished_flag=None,
                        use_pool=False):
            btic = time.time()
            for i, batch in enumerate(train_data):
                if i == num_batches:
                    if use_pool:
                        shared_finished_flag.value = True
                    return
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \
                                    for X in data]

                with ag.record():
                    if model_name == 'ShuffleNas' and use_pool:
                        cand = None
                        while cand is None:
                            if len(pool) > 0:
                                with pool_lock:
                                    cand = pool.pop()
                                    if i % opt.log_interval == 0:
                                        logger.debug('[Trainer] ' + '-' * 40)
                                        logger.debug(
                                            "[Trainer] Time: {}".format(
                                                time.time()))
                                        logger.debug(
                                            "[Trainer] Block choice: {}".
                                            format(cand['block_list']))
                                        logger.debug(
                                            "[Trainer] Channel choice: {}".
                                            format(cand['channel_list']))
                                        logger.debug(
                                            "[Trainer] Flop: {}M, param: {}M".
                                            format(cand['flops'],
                                                   cand['model_size']))
                            else:
                                time.sleep(1)

                        full_channel_masks = [
                            cand['channel'].as_in_context(ctx_i)
                            for ctx_i in ctx
                        ]
                        outputs = [
                            net(X.astype(opt.dtype, copy=False), cand['block'],
                                channel_mask) for X, channel_mask in zip(
                                    data, full_channel_masks)
                        ]
                    elif model_name == 'ShuffleNas':
                        block_choices = net.random_block_choices(
                            select_predefined_block=False, dtype=opt.dtype)
                        if opt.cs_warm_up:
                            full_channel_mask, channel_choices = net.random_channel_mask(
                                select_all_channels=opt.use_all_channels,
                                epoch_after_cs=epoch - opt.epoch_start_cs,
                                dtype=opt.dtype,
                                ignore_first_two_cs=opt.ignore_first_two_cs)
                        else:
                            full_channel_mask, channel_choices = net.random_channel_mask(
                                select_all_channels=opt.use_all_channels,
                                dtype=opt.dtype,
                                ignore_first_two_cs=opt.ignore_first_two_cs)

                        full_channel_masks = [
                            full_channel_mask.as_in_context(ctx_i)
                            for ctx_i in ctx
                        ]
                        outputs = [
                            net(X.astype(opt.dtype, copy=False), block_choices,
                                channel_mask) for X, channel_mask in zip(
                                    data, full_channel_masks)
                        ]
                    else:
                        outputs = [
                            net(X.astype(opt.dtype, copy=False)) for X in data
                        ]

                    if distillation:
                        loss = [
                            L(yhat.astype('float32', copy=False),
                              y.astype('float32', copy=False),
                              p.astype('float32', copy=False))
                            for yhat, y, p in zip(outputs, label, teacher_prob)
                        ]
                    else:
                        loss = [
                            L(yhat, y.astype(opt.dtype, copy=False))
                            for yhat, y in zip(outputs, label)
                        ]
                for l in loss:
                    l.backward()
                trainer.step(batch_size, ignore_stale_grad=True)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'
                        % (epoch, i, batch_size * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score, trainer.learning_rate))
                    btic = time.time()
            return