def eval2015(net,
             test_folder,
             result_folder,
             text_threshold=0.7,
             link_threshold=0.4,
             low_text=0.4):
    image_list, _, _ = file_utils.get_files(test_folder)
    t = time.time()
    res_gt_folder = os.path.join(result_folder, 'gt')
    res_mask_folder = os.path.join(result_folder, 'mask')
    # load data
    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path),
              end='\n')
        image = imgproc.loadImage(image_path)

        bboxes, polys, score_text = test_net(net, image, text_threshold,
                                             link_threshold, low_text, True,
                                             False, 2240, 1.5, False)

        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = os.path.join(res_mask_folder,
                                 "/res_" + filename + '_mask.jpg')
        cv2.imwrite(mask_file, score_text)

        file_utils.saveResult15(image_path, polys, dirname=res_gt_folder)

    eval_2015(os.path.join(result_folder, 'gt'))
    print("elapsed time : {}s".format(time.time() - t))
def main():
    dataset = datasets.Datasets(datasets.Datasets_list.signfi_150_user4)
    feature_encoder = CNNEncoder(Net_factor)
    relation_network = RelationNetwork(Net_factor)
    feature_encoder.to(device)
    relation_network.to(device)
    path = Path("weights/User_ours")
    relation_network_best_path = path / '06_15_12_09/relation_network_T!06_15_12_11_23_D!signfi_150_user123_5W1S_E!299_A!0.8830.pkl'
    feature_encoder_best_path = path / '06_15_12_09/feature_encoder_T!06_15_12_11_23_D!signfi_150_user123_5W1S_E!299_A!0.8830.pkl'

    serialization.load_net(feature_encoder, feature_encoder_best_path)
    serialization.load_net(
        relation_network,
        relation_network_best_path,
    )

    # test
    logprint("DIY Testing...")
    feature_encoder.eval()
    relation_network.eval()
    test_accuracy, confidence_interval = test_net(
        feature_encoder,
        relation_network,
        5,
        dataset,
        class_range=(0, 150),
        train=False,
    )
    logprint(f"test accuracy: {test_accuracy:.4f} h:{confidence_interval} ")
예제 #3
0
def test_tf_implementation(cache_file, weights_path, all_layer_outs=False):
    # Get Weighted Model
    tf_model = get_weighted_tf_implementation(weights_path, all_layer_outs)

    # Load Cached Test Data
    roidb = get_test_data(cache_file)

    # Test Network
    results = test.test_net(tf_model, roidb)
    return results
예제 #4
0
def validate_net(model_path, prev_accs):
    print "Validating ..."

    # net.name = experiment_name
    caffe.set_mode_gpu()
    caffe.set_device(cfg.GPU_ID)
    net = caffe.Net(cfg.val_prototxt, model_path, caffe.TEST)
    net.name = os.path.splitext(os.path.basename(model_path))[0]

    acc = test_net(net, model_path.split('/')[-1], vis=False, prev_accs=prev_accs)
    return acc
예제 #5
0
    def train_model(self, max_iters):
        """Network training loop."""
        last_snapshot_iter = -1
        timer = Timer()
        model_paths = []
        while self._solver.iter < max_iters:
            # Make one SGD update
            timer.tic()
            self._solver.step(1)
            timer.toc()
            if self._solver.iter % (10 * self._solver_param.display) == 0:
                print 'speed: {:.3f}s / iter'.format(timer.average_time)
            if self._solver.iter % 10 == 0:
                print "Python: iter", self._solver.iter
            if self._solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = self._solver.iter
                model_paths.append(self.snapshot())

            if self._solver.iter % cfg.TRAIN.TEST_ITERS == 0:
                test_net(self._solver.test_net, self._db, self._output_dir)

        if last_snapshot_iter != self._solver.iter:
            model_paths.append(self.snapshot())
        return model_paths
예제 #6
0
        print 'Iteration:', str(iteration)
        print 'Loss:', train_net.blobs['loss'].data
        sys.stdout.flush()

        #-- create snapshot and test
        if iteration % test_every_n_iters == 0:
            print 'Snapshoting and Testing'
            sys.stdout.flush()

            #-- snapshot
            snapshot_path = utils.get_snapshot_name(experiment, split_name,
                                                    iteration)
            snapshot(train_net, snapshot_path)

            #-- test
            net_results, position_results = test.test_net(
                test_model, snapshot_path, test_data, val_iters, position_maps)

            im_acc, price_acc, name_acc = net_results
            print 'NET: image accuracy:', im_acc
            print 'NET: price accuracy:', price_acc
            print 'NET: name accuracy:', name_acc

            p_im_acc, p_price_acc, p_name_acc = position_results
            print 'NET+POSITION: image accuracy:', p_im_acc
            print 'NET+POSITION: price accuracy:', p_price_acc
            print 'NET+POSITION: name accuracy:', p_name_acc

            sys.stdout.flush()

    ###--- FINAL SNAPSHOT
    snapshot_path = utils.get_snapshot_name(experiment, split_name,
예제 #7
0
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps, lr,
            momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every N iterations (N defined in handout)

    if step % vis_interval == 0 and step > 0:
        net.eval()
        aps = test_net(name='WSDDN_test',
                       imdb=imdb_test,
                       net=net,
                       thresh=1e-4,
                       visualize=True,
                       logger=tboard_writer,
                       step=step)
        net.train()

    #TODO: Perform all visualizations here
    #You can define other interval variable if you want (this is just an
    #example)
    #The intervals for different things are defined in the handout
    tboard_writer.add_scalar('train/loss', loss.item(), step)
    if step == 0:
        visdom_loss = visdom_logger.line(X=np.array([step]),
                                         Y=np.array([loss.item()]),
                                         opts=dict(title='train/loss'))
    else:
예제 #8
0
파일: test_net.py 프로젝트: fireae/DDPN
        qdic = Dictionary(qdic_dir)
        qdic.load()
        vocab_size = qdic.size()
        test_model = models.net(opts.test_split, vocab_size, opts)
        test_net_path = osp.join(get_models_dir(), 'test.prototxt')
        with open(test_net_path, 'w') as f:
            f.write(str(test_model))
    else:
        test_net_path = opts.test_net

    caffe.set_mode_gpu()
    caffe.set_device(opts.gpu_id)
    net = caffe.Net(test_net_path, opts.pretrained_model, caffe.TEST)
    net.name = os.path.splitext(os.path.basename(opts.pretrained_model))[0]

    log_file = osp.join(
        cfg.LOG_DIR, '%s_%s_%s_accuracy.txt' %
        (cfg.IMDB_NAME, cfg.FEAT_TYPE, cfg.PROJ_NAME))
    if os.path.exists(log_file):
        os.remove(log_file)
    test_split = opts.test_split
    if type(test_split) is list:
        for split in test_split:
            accuracy = test_net(split, net, opts.batchsize, vis=opts.vis_pred)
            with open(log_file, 'a') as f:
                f.write('%s accuracy: %f\n' % (split, accuracy))
    else:
        accuracy = test_net(test_split, net, opts.batchsize, vis=opts.vis_pred)
        with open(log_file, 'a') as f:
            f.write('%s accuracy: %f\n' % (test_split, accuracy))
예제 #9
0
    if step % disp_interval == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps, lr,
            momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every N iterations (N defined in handout)
    if (step) % eval_interval == 0:
        net.eval()
        aps = test_net(name=save_name,
                       net=net,
                       imdb=imdb_val,
                       thresh=thresh,
                       logger=writer,
                       visualize=visualize,
                       step=step)
        mAP = np.mean(aps)
        print("Average Precisions are: ", aps)
        if (step == last_eval_step):
            print("Final Step Result")
            print("All MAP's ", map_list)
            print("Final mAP is: ", mAP)
            print("Final class-wise AP is: ")
            for id, elt in enumerate(aps):
                class_name = imdb_val._classes[id]
                print(str(class_name) + "_AP: ", aps[id])

        if visualize:
예제 #10
0
            parser.print_help()
            parser.error('For testing, test_mode must be 0,1 or 2.')

    if not args.model:
        print('No model specified. using default: ', default_store_model)
        args.model = default_store_model

    if not args.gpu:
        args.gpu = '0'

    return args

if __name__ == '__main__':
    args = parse_arguments()

    # Limit execution on certain GPU/GPUs
    gpu_id = args.gpu  # Comma separated string of GPU IDs to be used e.g. '0, 1, 2, 3'
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id

    common_cfg_file = os.path.join('configure', 'common.json')
    train_cfg_file = os.path.join('configure', 'train.json')
    test_cfg_file = os.path.join('configure', 'test.json')

    if args.phase == 'train':
        train_net(
            common_cfg_file, train_cfg_file, args.train_mode, args.model, args.epochs,
            args.retrain, args.initial_epoch)
    else:
        test_net(common_cfg_file, test_cfg_file, args.test_mode, args.model)
    def for_resnet50():

        #shallow_path = common.shallow_path("LF_FT_A", trainset_name, feat_net, ext=False)
        LF = new_model(in_shape, out_shape)

        # shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5"
        # LF.load_weights(shallow_path, by_name=True)
        # score = test_net(LF, testset)
        # write_net_score(score, "AB best", testset_name, "test_results.csv", detailed_csv=True)
        #
        # shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.last.h5"
        # LF.load_weights(shallow_path, by_name=True)
        # score = test_net(LF, testset)
        # write_net_score(score, "AB last", testset_name, "test_results.csv", detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "A best (5ep)",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.last.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "A last (5ep)",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A best",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.00.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 0",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.01.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 1",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.02.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 2",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.03.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 3",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.04.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 4",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)
예제 #12
0
def main():
    hyper = Hyperparameters()
    STACKS = hyper.STACKS
    LEARNING_RATE = hyper.LEARNING_RATE
    BATCH_SIZE = hyper.BATCH_SIZE
    EPOCHS = hyper.EPOCHS
    train_set = get_selected_datasets([0, 1, 2, 3], stacks=STACKS)
    dataset_loader = torch.utils.data.DataLoader(train_set,
                                                 batch_size=BATCH_SIZE, shuffle=True,
                                                 num_workers=4)

    test_set = get_selected_datasets([4], stacks=STACKS)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=BATCH_SIZE, shuffle=True,
                                              num_workers=4)

    net, device, optimizer, criterion = get_common_items(hyper)

    # test_loss = test_net(net, test_loader, device, criterion)
    # print("Starting Test loss: %s" % (test_loss,))

    training_steps = 0

    for epoch in range(EPOCHS):
        print("Starting epoch %s" % (epoch,))
        epoch_loss = 0.0
        with tqdm(iter(dataset_loader)) as t:
            for images, labels, frames in iter_net_transform(t, device):
                # Train
                optimizer.zero_grad()
                output = net(images)
                target = labels.view_as(output)  # make it the same shape as output
                loss = criterion(output, target)
                loss.backward()
                # gradient clip
                clip = .5
                for p in net.parameters():
                    if p.grad is not None:
                        p.grad.data.clamp_(-clip, clip)
                optimizer.step()

                epoch_loss += loss.item()
                t.set_description("Loss: %s" % (loss.item(),))
                # exit()

        # test the epoch
        print("Testing epoch %s" % (epoch,))
        test_loss = test_net(net, test_loader, device, criterion)
        print("Test loss: %s" % (test_loss,))

        # finish epoch and make checkpoint
        print("Finishing epoch %s" % (epoch,))
        average_loss = epoch_loss / len(dataset_loader)
        training_steps += len(dataset_loader)
        print("Training Loss: %s" % (average_loss,))
        stats = Statistics(loss=average_loss, test_loss=test_loss, training_steps=training_steps)
        # exit()
        checkpoint = Checkpoint(net, optimizer, training_steps=epoch, statistics=dataclasses.asdict(stats))
        checkpoint.save_state("./data/nets/check_%s.tor" % (epoch,))

    test_loss = test_net(net, test_loader, device, criterion)
    print("Final Test loss: %s" % (test_loss,))
예제 #13
0
    if not os.path.exists(model_save_folder):
        os.makedirs(model_save_folder)
    default.e2e_prefix = model_save_folder + '/' + default.exp_name
    if default.begin_epoch != 0:
        default.resume = True
    default.accs = dict()

    if default.gpus == '':  # auto select GPU
        import GPUtil
        deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.2)
        if len(deviceIDs) == 0:
            deviceIDs = GPUtil.getAvailable(order='lowest',
                                            limit=1,
                                            maxMemory=.9,
                                            maxLoad=1)

        GPUs = GPUtil.getGPUs()
        default.gpus = str(len(GPUs) - 1 - deviceIDs[0])
        logger.info('using gpu ' + default.gpus)
    default.val_gpu = default.gpus[0]
    # default.prefetch_thread_num = min(default.prefetch_thread_num, config.TRAIN.SAMPLES_PER_BATCH)

    train_net(default)

    # test the best model on the test set
    from test import test_net
    print('Testing best epoch....')
    test_net(default.e2e_prefix, default.best_epoch, exec_args.exp_folder)
    print('Testing last epoch....')
    test_net(default.e2e_prefix, default.e2e_epoch, exec_args.exp_folder)
                        loss_value / 2)
                )
                loss_time = 0
                loss_value = 0
                st = time.time()
            # if loss < compare_loss:
            #     print('save the lower loss iter, loss:',loss)
            #     compare_loss = loss
            #     torch.save(net.module.state_dict(),
            #                './output/real_weights/lower_loss.pth')

        print('Saving state, iter:', epoch)
        torch.save(net.module.state_dict(), 'weights/clr_' + repr(epoch) + '.pth')

        for k, image_path in enumerate(image_list):
            print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r')
            image = imgproc.loadImage(image_path)

            bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text,
                                                 args.cuda, args.poly, args.ocr_type)
            # save score text
            filename, file_ext = os.path.splitext(os.path.basename(image_path))
            # mask_file = result_folder + "/res_" + filename + '_mask.jpg'
            # cv2.imwrite(mask_file, score_text)

            file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname='weights/' + repr(epoch) + '/')

        # test('./output/clr_' + repr(epoch) + '.pth')
        # test('./output/mlt_25k.pth')
        # getresult()
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    if cfg.NET == "res101":
        net = ResNetv2(101)
    elif cfg.NET == "nasnet":
        net = NasNet()

    imdb = DataLayer(args.imdb)
    wlabel = args.imdb != "test"

    net.create_architecture("TRAIN", num_classes=129, tag='test')
    # read checkpoint file
    if args.ckpt:
        ckpt = tf.train.get_checkpoint_state(args.ckpt)
    else:
        raise ValueError("NO checkpoint found in {}".format(args.ckpt))

    # set config
    tfconfig = tf.ConfigProto(allow_soft_placement=True)
    tfconfig.gpu_options.allow_growth = True

    # init session
    saver = tf.train.Saver()
    with tf.Session(config=tfconfig) as sess:
        print('Restored from {}'.format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
        test_net(sess, net, imdb, args.tag, wlabel)
예제 #16
0
    
    # Log to screen
    if step % disp_interval == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1./fps, lr, momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every 5000 iterations (N defined in handout)
    if step%100 == 0 and step>0:
        save_name = 'test_'+str(step)
        net.eval()
        imdb_test.competition_mode(on=True)
        ap = test_net(save_name, net, imdb_test,
                       cfg.TRAIN.BATCH_SIZE, thresh=thresh, visualize=use_visdom)
#       # import pdb; pdb.set_trace()
        for i in range(len(imdb.classes)):
            cls_name = imdb.classes[i]
            cur_ap = ap[i]
            logger.scalar_summary('{}_AP'.format(cls_name), cur_ap, step)
        viz.updateTrace(X=np.array([step]), Y=np.array([np.average(ap)]), win=ap_win,name='train_ap')




    #TODO: Perform all visualizations here
    #You can define other interval variable if you want (this is just an
    #example)
    #The intervals for different things are defined in the handout
#     if visualize and step%vis_interval==0:
예제 #17
0

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()

    print('Called with args:')
    print(args)

    print('Using config:')
    pprint.pprint(cfg)



    caffe.set_mode_gpu()
    caffe.set_device(args.gpu_id)

    net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
    net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]

    imdb = imdb('102flowers_part_256')
    _imdb = imdb.get_test_image(wholepic=False)
    print 'nums of images:',len(_imdb)
    test_net(net,_imdb,all=False)    #_imdb[imagepath,label,boxes,flipped]
예제 #18
0
    if step % disp_interval == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps, lr,
            momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every N iterations (N defined in handout)

    if step % vis_interval == 0 and step > 0:
        AP = test.test_net(name='test',
                           net=net,
                           imdb=imdb_test,
                           logger=writer,
                           step=step,
                           visualize=True,
                           thresh=0.001)
        # import pdb; pdb.set_trace()
        for i in range(len(AP)):
            writer.add_scalar('test_AP_class_: ' + imdb._classes[i], AP[i],
                              step)
        if flag_create_plot_mAP == True:
            vis.line(Y=np.array([np.mean(AP)]),
                     X=np.array([step]),
                     win="mAP_test",
                     opts=dict(title='mAP_vs_step(test)'))
            flag_create_plot_mAP = False
        else:
            vis.line(Y=np.array([np.mean(AP)]),
예제 #19
0
파일: train.py 프로젝트: jpulidojr/3DCE_new
    merge_a_into_b(config_file, config)
    config.NUM_ANCHORS = len(config.ANCHOR_SCALES) * len(config.ANCHOR_RATIOS)

    if config.FRAMEWORK != '3DCE':
        assert config.NUM_IMAGES_3DCE == 1, "Combining multiple images is only possible in 3DCE"

    default_file = cfg_from_file('default.yml')
    merge_a_into_b(default_file, default)
    default.e2e_prefix = 'model/' + default.exp_name
    if default.begin_epoch != 0:
        default.resume = True
    default.accs = dict()

    if default.gpus == '':  # auto select GPU
        import GPUtil
        deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.2)
        if len(deviceIDs) == 0:
            deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.9, maxLoad=1)

        GPUs = GPUtil.getGPUs()
        default.gpus = str(len(GPUs)-1-deviceIDs[0])
        logger.info('using gpu '+default.gpus)
    default.val_gpu = default.gpus[0]
    # default.prefetch_thread_num = min(default.prefetch_thread_num, config.TRAIN.SAMPLES_PER_BATCH)

    train_net(default)

    # test the best model on the test set
    from test import test_net
    test_net(default.e2e_prefix, default.best_epoch)
def train(checkpoint_dir=None, data_dir=None, data_config=None):

    config = data_config
    # Device
    device = config['device']

    # Tokenizer
    tokenizer = BertTokenizer.from_pretrained(config['model_name'])

    # Label Dictionary
    label_dict, weights = get_label_dict(config)

    #Loss
    weights = weights.to(device=device)
    criterion = nn.CrossEntropyLoss()

    # DataLoaders - Train, Test and Validation
    train_loader, val_loader, test_loader = load_data(config, label_dict,
                                                      tokenizer)

    # Model Word Embeddings only
    """model = DialogActClassificationWE(
            model_name=config['model_name'],
            hidden_size=config['hidden_size'],
            num_classes=config['num_classes'],
            device=config['device']
        )
    # Model Word Embedding with Prosody
    """
    model = DialogActClassificationProsodyWord(
        model_name=config['model_name'],
        hidden_size=config['hidden_size'],
        num_classes=config['num_classes'],
        device=config['device'])

    model.to(device)

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])

    train_loss = []
    validation_loss = []
    start_time = time.time()
    n_iters = 0
    for epoch in range(config['epochs']):
        model.train()
        print("Run Epoch {}".format(epoch))
        epoch_loss = []
        for batch in train_loader:
            input_ids = batch['input_ids'].to(device=device)
            attention_mask = batch['attention_mask'].to(device=device)
            targets = (batch['label'].squeeze()).to(device=device)
            seq_len = batch['seq_len'].to(device=device)
            # Speech
            pitch = batch['pitch'].to(device=device)
            freq = batch['freq'].to(device=device)
            # Data Dictionary
            data = {
                'input_ids': input_ids,
                'attention_mask': attention_mask,
                'label': targets,
                'seq_len': seq_len,
                'pitch': pitch,
                'freq': freq
            }
            logits = model(data)
            loss = criterion(logits, targets)
            epoch_loss.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    # Train Loss per epoch
        train_loss.append(sum(epoch_loss) / len(epoch_loss))

        # Validation
        val_loss, val_accuracy = eval_net(model, val_loader, device, criterion)
        validation_loss.append(val_loss)
        print("Validation accuracy :", val_accuracy)

        if epoch % 2 == 0:
            try:
                os.mkdir(checkpoint_dir)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(model.state_dict(),
                       checkpoint_dir + f'CP_epoch{epoch + 1}.pth')
            logging.info(f'Checkpoint {epoch + 1} saved !')

        end_time = time.time()
        #print("Time difference for each Epoch: {}".format(end_time-start_time))
        start_time = time.time()

    test_accuracy, f1_score, f1_per_cls = test_net(test_loader, model, device)
    print("f1-score: ", f1_score)
    print("f1-score per class: ", f1_per_cls)
    # == Train Loss Curves ==
    plt.figure(figsize=(15, 7))
    plt.subplot(2, 1, 1)
    plt.title('Training loss')
    plt.plot(train_loss, '-o')
    plt.xlabel('Epoch')
    plt.savefig('./plots/train_loss.png')

    # == Validation Loss Curves ==
    plt.figure(figsize=(15, 7))
    plt.subplot(2, 1, 1)
    plt.title('Validation loss')
    plt.plot(validation_loss, '-o')
    plt.xlabel('Total Batch size/10')
    plt.savefig('./plots/val_score.png')

    print("==End of Training ==")
            refine_net = refine_net.cuda()
            refine_net = torch.nn.DataParallel(refine_net)
        else:
            refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu')))

        refine_net.eval()
        args.poly = True

    t = time.time()

    # load data
    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r')
        image = imgproc.loadImage(image_path)

        bboxes, polys, score_text, det_scores = test.test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args, refine_net)
        
        bbox_score={}

        for box_num in range(len(bboxes)):
          key = str (det_scores[box_num])
          item = bboxes[box_num]
          bbox_score[key]=item

        data['word_bboxes'][k]=bbox_score
        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = result_folder + "/res_" + filename + '_mask.jpg'
        cv2.imwrite(mask_file, score_text)

        file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder)
    if step % disp_interval == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps, lr,
            momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every N iterations (N defined in handout)
    if step % 5000 == 0 and step > 0:
        net.eval()
        aps = test_net('WSDDN_TEST',
                       net,
                       imdb_test,
                       300,
                       thresh=1e-4,
                       visualize=True,
                       logger=logger,
                       step=step)
        net.train()

    #TODO: Perform all visualizations here
    #You can define other interval variable if you want (this is just an
    #example)
    #The intervals for different things are defined in the handout
    #TODO: Create required visualizations
    if use_tensorboard:
        if visualize and step % 500 == 0:
            logger.scalar_summary('train/loss', loss, step)
        if visualize and step % 2000 == 0:
            logger.model_param_histo_summary(net, step)
def main():

    logprint("init data")
    train_dataset = datasets.Datasets(TRAIN_DATA)
    test_dataset = datasets.Datasets(TEST_DATA)
    #val_dataset = datasets.Datasets(VAL_DATA)
    if not os.path.exists("weights/User_ours"):
        os.makedirs("weights/User_ours")
    weight_name = Path(time.strftime("%m_%d_%H_%M", time.localtime()))
    weight_path = "weights/User_ours" / weight_name
    os.makedirs(weight_path)
    logprint("init neural networks")

    feature_encoder = CNNEncoder(Net_factor)
    relation_network = RelationNetwork(Net_factor)

    feature_encoder.to(device)
    relation_network.to(device)

    feature_encoder_optim = torch.optim.AdamW(feature_encoder.parameters(),
                                              lr=LR)
    feature_encoder_post_scheduler = lr_scheduler.CosineAnnealingLR(
        feature_encoder_optim, T_max=20000, eta_min=5e-5)
    feature_encoder_scheduler = GradualWarmupScheduler(
        feature_encoder_optim,
        multiplier=3,
        total_epoch=100,
        after_scheduler=feature_encoder_post_scheduler,
    )
    relation_network_optim = torch.optim.Adam(relation_network.parameters(),
                                              lr=LR)
    # relation_network_optim = optim.AdaBound(relation_network.parameters(), lr=LR)
    relation_network_post_scheduler = lr_scheduler.CosineAnnealingLR(
        relation_network_optim, T_max=20000, eta_min=5e-5)
    relation_network_scheduler = GradualWarmupScheduler(
        relation_network_optim,
        multiplier=3,
        total_epoch=100,
        after_scheduler=relation_network_post_scheduler,
    )
    loss_f = nn.MSELoss().to(device)
    feature_encoder.zero_grad()
    relation_network.zero_grad()
    feature_encoder_optim.step()
    relation_network_optim.step()

    # Step 3: build graph
    logprint("Training...")
    max_acc = 0.0
    max_train_acc = 0.0
    for episode in range(EPISODE):

        # init dataset
        # sample_dataloader is to obtain previous samples for compare
        # batch_dataloader is to batch samples for training

        # sample datas
        samples, sample_labels, batches, batch_labels = train_dataset.get_train_datas(
            CLASS_NUM, SAMPLE_NUM_PER_CLASS, BATCH_NUM_PER_CLASS, (0, 10))

        # calculate features

        sample_features = feature_encoder(samples.to(device))
        sample_features = sample_features.view(CLASS_NUM, SAMPLE_NUM_PER_CLASS,
                                               FEATURE_DIM, FEATURE_H,
                                               FEATURE_W)
        sample_features = torch.sum(sample_features, 1).squeeze(1)

        batch_features = feature_encoder(batches.to(device))

        # calculate relations
        # each batch sample link to every samples to calculate relations
        # to form a 100x128 matrix for relation network
        sample_features_ext = sample_features.unsqueeze(0).repeat(
            BATCH_NUM_PER_CLASS * CLASS_NUM, 1, 1, 1, 1)
        batch_features_ext = batch_features.unsqueeze(0).repeat(
            CLASS_NUM, 1, 1, 1, 1)
        batch_features_ext = torch.transpose(batch_features_ext, 0, 1)

        relation_pairs = torch.cat((sample_features_ext, batch_features_ext),
                                   2).view(-1, FEATURE_DIM * 2, FEATURE_H,
                                           FEATURE_W)
        relations = relation_network(relation_pairs).view(-1, CLASS_NUM)

        one_hot_labels = (torch.zeros(BATCH_NUM_PER_CLASS * CLASS_NUM,
                                      CLASS_NUM).scatter_(
                                          1,
                                          batch_labels.view(-1, 1).long(),
                                          1).to(device))

        loss = loss_f(relations, one_hot_labels)

        # training

        feature_encoder.zero_grad()
        relation_network.zero_grad()

        loss.backward()

        torch.nn.utils.clip_grad_value_(feature_encoder.parameters(), 0.5)
        torch.nn.utils.clip_grad_value_(relation_network.parameters(), 0.5)

        feature_encoder_optim.step()
        relation_network_optim.step()

        feature_encoder_scheduler.step()
        relation_network_scheduler.step()

        if (episode + 1) % 20 == 0:
            logprint(
                f"episode:{episode + 1} loss {loss.item():.5f} lr {feature_encoder_optim.param_groups[0]['lr']:.5f}"
            )
        #
        if (episode + 1) % TEST_T == 0:

            # test
            logprint("Testing...")
            feature_encoder.eval()
            relation_network.eval()
            with torch.no_grad():
                test_accuracy, confidence_interval = test_net(
                    feature_encoder,
                    relation_network,
                    TEST_EPISODE,
                    test_dataset,
                    class_range=(100, 200),
                    train=False,
                )
                test_train_accuracy, confidence_interval_train = test_net(
                    feature_encoder,
                    relation_network,
                    TEST_EPISODE,
                    train_dataset,
                    class_range=(0, 10),
                    train=True,
                )

            logprint(
                f"test train accuracy: {test_train_accuracy:.4f} h:{confidence_interval_train} best:{max_train_acc:.4f}"
            )
            logprint(
                f"test accuracy: {test_accuracy:.4f} h:{confidence_interval} best:{max_acc:.4f}"
            )

            if test_train_accuracy > max_train_acc:
                max_train_acc = test_train_accuracy
            if test_accuracy > max_acc:

                max_acc = test_accuracy
                time_str = time.strftime("%m_%d_%H_%M_%S", time.localtime())

                log_str = f"T!{time_str}_D!{train_dataset.get_datas_name()}_{CLASS_NUM}W{SAMPLE_NUM_PER_CLASS}S_E!{episode}_A!{max_acc:.4f}"
                serialization.save_net(
                    feature_encoder,
                    weight_path / f"feature_encoder_{log_str}.pkl",
                )
                serialization.save_net(
                    relation_network,
                    weight_path / f"relation_network_{log_str}.pkl",
                )

            feature_encoder.train()
            relation_network.train()
예제 #24
0
        cfg.TEST.MODEL = osp.join(output_dir, 'final.caffemodel')

    if args.test == 'true' or args.test == 'True':  # the testing entrance
        if isinstance(cfg.TEST.GPU_ID, int):
            cfg.TEST.GPU_ID = [cfg.TEST.GPU_ID]

        if not cfg.TEST.DEMO.ENABLE:
            imdb = get_imdb(cfg.TEST.DB)
            output_dir = get_output_dir(imdb.name,
                                        cfg.NAME + '_' + cfg.LOG.TIME)
        else:
            imdb = None
            output_dir = get_output_dir("demo", cfg.NAME + '_' + cfg.LOG.TIME)

        f = open(osp.join(output_dir, 'stderr.log'), 'w', 0)
        os.dup2(f.fileno(), sys.stderr.fileno())
        os.dup2(sys.stderr.fileno(), sys.stderr.fileno())

        # Edit test prototxts
        target_test = osp.join(output_dir, 'test.prototxt')

        manipulate_test(cfg.TEST.PROTOTXT, target_test)

        with open(osp.join(output_dir, 'cfgs.txt'), 'w') as f:
            cfg_dump({i: cfg[i] for i in cfg if i != 'TRAIN'}, f)
        tb.sess.add_text('test_cfg', \
                         cfg_table({i: cfg[i] for i in cfg if i != 'TRAIN'}))

        test_net(imdb, output_dir, target_test, no_cache=cfg.TEST.NO_CACHE)
        f.close()
예제 #25
0
            logger.model_param_histo_summary(net, step)
    if use_visdom:
        if step % epoch_loss == 0:
            cap = "train loss"
            vis.line(X=(np.asarray([step])),
                     Y=(np.asarray([train_loss / step_cnt])),
                     win=win,
                     update='append')
    # TODO: evaluate the model every N iterations (N defined in handout)

    if visualize and step % vis_interval == 0:
        net.eval()
        aps = test_net(save_name,
                       net,
                       test_imdb,
                       max_per_image,
                       thresh=thresh,
                       visualize=visualize,
                       logger=logger,
                       step=step)
        net.train()
        #TODO: Create required visualizations
        if use_tensorboard:
            for i_cls in range(test_imdb.num_classes):
                logger.scalar_summary(
                    'AP_{0}'.format(test_imdb._classes[i_cls]), aps[i_cls],
                    step)
                print('Logging to Tensorboard')
        if use_visdom:
            vis.line(X=(np.asarray([step])),
                     Y=(np.asarray([np.mean(aps)])),
                     win=mAP_win,
예제 #26
0
        torch.save(score_region, label_dir + 'region.pt')
        torch.save(score_link, label_dir + 'link.pt')
        torch.save(conf_map, label_dir + 'conf.pt')


if __name__ == '__main__':
    import ocr
    score_region = torch.load('/home/ubuntu/Kyumin/craft/data/IC13/labels/train/100/region.pt')
    score_link = torch.load('/home/ubuntu/Kyumin/craft/data/IC13/labels/train/100/link.pt')
    conf_map = torch.load('/home/ubuntu/Kyumin/craft/data/IC13/labels/train/100/conf.pt')
    image = imgproc.loadImage('/home/ubuntu/Kyumin/Autotation/data/IC13/images/train/100.jpg')
    print(score_region.shape, score_link.shape, conf_map.shape)
    # cv2.imshow('original', image)
    cv2.imshow('region', imgproc.cvt2HeatmapImg(score_region))
    cv2.imshow('link', score_link)
    cv2.imshow('conf', conf_map)

    net = CRAFT().cuda()
    net.load_state_dict(test.copyStateDict(torch.load('weights/craft_mlt_25k.pth')))

    net.eval()
    _, _, ref_text, ref_link, _ = test.test_net(net, image, ocr.argument_parser().parse_args())
    cv2.imshow('ref text', imgproc.cvt2HeatmapImg(ref_text))
    cv2.imshow('ref link', ref_link)

    cv2.waitKey(0)
    cv2.destroyAllWindows()


예제 #27
0
파일: train.py 프로젝트: WWWWWLI/KWS_Text
def train(net, trained_epoch, optimizer, best_valid_acc, savedir, logger):
    setup_seed(config.SEED)
    os.environ['CUDA_VISIBLE_DEVICES'] = config.TRAIN.VISIBLEDEVICES
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if torch.cuda.device_count() > 1:
        logger.info('[Message] Multi GPUS:{}'.format(torch.cuda.device_count()))
        net = torch.nn.DataParallel(net, device_ids=list(range(torch.cuda.device_count())))
    elif torch.cuda.device_count() == 1:
        logger.info('[Message] 1 GPU')
    else:
        logger.info('[Message] CPU')
    net.to(device)

    # Train dataset and dataloader
    train_dataset = GoogleSpeechCommandDataset(set='train')
    train_dataloader = DataLoader(train_dataset, batch_size=config.TRAIN.BATCHSIZE, shuffle=True,
                                  num_workers=config.TRAIN.NUMWORKS, pin_memory=True)

    # losses
    if 'CE' in config.TRAIN.LOSS:
        # Cross entropy loss
        ce_criterion = nn.CrossEntropyLoss()
    if 'TRIPLET' in config.TRAIN.LOSS:
        # triplet loss
        tri_criterion = nn.TripletMarginLoss()
    if 'CCA' in config.TRAIN.LOSS:
        # CCA loss
        cca_criterion = cca_loss(outdim_size=config.TRAIN.CCAOUTDIM, use_all_singular_values=False, device=device).loss

    # optimizer
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=config.TRAIN.PATIENCE,
                                                     verbose=True, factor=0.9)

    logger.info('[Train] Batch size {}'.format(config.TRAIN.BATCHSIZE))
    logger.info('[Train] Start epoch {}'.format(trained_epoch))
    logger.info('[Train] Patience {}'.format(config.TRAIN.PATIENCE))
    logger.info('[Train] Init learning Rate {}'.format(config.TRAIN.LR))

    if config.TRAIN.MODE == 'Text' or config.TRAIN.MODE == 'TextAnchor' or config.TRAIN.MODE == 'CCA':
        logger.info('[Train] Use text embedding:{}'.format(config.TEXTEMB))

    # Counter. If valid acc not improve in patience epochs, stop training
    counter = 0
    best_epoch = 0
    best_state = None

    # save model code
    shutil.copy(config.ROOTDIR + 'models/' + config.TRAIN.MODELTYPE + '.py', savedir + 'scripts/')
    shutil.copy(config.ROOTDIR + 'utils/' + 'dataloader.py', savedir + 'scripts/')
    shutil.copy(config.ROOTDIR + 'config.py', savedir + 'scripts/')
    shutil.copy(config.ROOTDIR + 'train.py', savedir + 'scripts/')
    shutil.copy(config.ROOTDIR + 'test.py', savedir + 'scripts/')

    for epoch in range(1, config.TRAIN.EPOCH + 1):
        net.train()
        with tqdm(train_dataloader, desc='Epoch {}'.format(epoch), ncols=150) as t:
            if config.TRAIN.MODE == 'NoText' or config.TRAIN.MODE == 'FinetuneSilence':
                sum_train_ce_loss = 0
                start_epoch_time = time.time()
                for (waveform, target) in t:
                    start_batch_time = time.time()
                    # t.set_description('Epoch {}'.format(epoch))

                    waveform = waveform.type(torch.FloatTensor)
                    waveform, target = waveform.to(device), target.to(device)

                    optimizer.zero_grad()

                    output = net(waveform)

                    train_ce_loss = ce_criterion(output, target)
                    sum_train_ce_loss = sum_train_ce_loss + train_ce_loss.item() * config.TRAIN.BATCHSIZE
                    train_loss = train_ce_loss

                    train_loss.backward()
                    optimizer.step()

                    end_batch_time = time.time()

                    t.set_postfix(train_ce_loss=train_ce_loss.item(),
                                  lr=optimizer.param_groups[0]['lr'],
                                  time_batch=end_batch_time - start_batch_time)
                end_epoch_time = time.time()
                message = '[Train] Epoch:{}, train_ce_loss:{:4f}, lr:{}, train_time(s):{:4f}'.format(
                    epoch,
                    sum_train_ce_loss / len(train_dataloader.dataset),
                    optimizer.param_groups[0]['lr'],
                    end_epoch_time - start_epoch_time
                )
            elif config.TRAIN.MODE == 'Text':
                sum_train_ce_loss = 0
                sum_train_tri_loss = 0
                start_epoch_time = time.time()
                for (waveform, match_word_vec, unmatch_word_vec, target) in t:
                    start_batch_time = time.time()
                    t.set_description('Epoch {}'.format(epoch))

                    waveform = waveform.type(torch.FloatTensor)
                    match_word_vec = match_word_vec.type(torch.FloatTensor)
                    unmatch_word_vec = unmatch_word_vec.type(torch.FloatTensor)
                    waveform = waveform.to(device)
                    match_word_vec = match_word_vec.to(device)
                    unmatch_word_vec = unmatch_word_vec.to(device)
                    target = target.to(device)

                    optimizer.zero_grad()

                    output, audio_embedding, match_word_vec, unmatch_word_vec = net(waveform, match_word_vec,
                                                                                    unmatch_word_vec)

                    train_ce_loss = ce_criterion(output, target)
                    train_tri_loss = tri_criterion(audio_embedding, match_word_vec, unmatch_word_vec)
                    sum_train_ce_loss = sum_train_ce_loss + train_ce_loss.item() * config.TRAIN.BATCHSIZE
                    sum_train_tri_loss = sum_train_tri_loss + train_tri_loss.item() * config.TRAIN.BATCHSIZE
                    train_loss = 0.5 * train_ce_loss + 0.5 * train_tri_loss

                    train_loss.backward()
                    optimizer.step()

                    end_batch_time = time.time()

                    t.set_postfix(train_ce_loss=train_ce_loss.item(),
                                  train_tri_loss=train_tri_loss.item(),
                                  lr=optimizer.param_groups[0]['lr'],
                                  time_batch=end_batch_time - start_batch_time)
                end_epoch_time = time.time()
                message = '[Train] Epoch:{}, train_ce_loss:{}, train_tri_loss:{:4f}, lr:{}, train_time(s):{:4f}'.format(
                    epoch,
                    sum_train_ce_loss / len(train_dataloader.dataset),
                    sum_train_tri_loss / len(train_dataloader.dataset),
                    optimizer.param_groups[0]['lr'],
                    end_epoch_time - start_epoch_time
                )
            elif config.TRAIN.MODE == 'TextAnchor':
                sum_train_ce_loss = 0
                sum_train_tri_loss = 0
                start_epoch_time = time.time()
                for (pos_waveform, neg_waveform, pos_word_vec, pos_label) in t:
                    start_batch_time = time.time()

                    pos_waveform = pos_waveform.type(torch.FloatTensor)
                    neg_waveform = neg_waveform.type(torch.FloatTensor)
                    pos_word_vec = pos_word_vec.type(torch.FloatTensor)
                    pos_waveform = pos_waveform.to(device)
                    neg_waveform = neg_waveform.to(device)
                    pos_word_vec = pos_word_vec.to(device)
                    pos_label = pos_label.to(device)

                    optimizer.zero_grad()

                    pos, audio_embedding_pos, audio_embedding_neg, text_embedding = net(pos_waveform, neg_waveform,
                                                                                        pos_word_vec)
                    train_ce_loss = ce_criterion(pos, pos_label)
                    train_tri_loss = tri_criterion(text_embedding, audio_embedding_pos, audio_embedding_neg)
                    sum_train_ce_loss = sum_train_ce_loss + train_ce_loss.item() * config.TRAIN.BATCHSIZE
                    sum_train_tri_loss = sum_train_tri_loss + train_tri_loss.item() * config.TRAIN.BATCHSIZE
                    train_loss = 0.5 * train_ce_loss + 0.5 * train_tri_loss

                    train_loss.backward()
                    optimizer.step()

                    end_batch_time = time.time()

                    t.set_postfix(train_ce_loss=train_ce_loss.item(),
                                  train_tri_loss=train_tri_loss.item(),
                                  lr=optimizer.param_groups[0]['lr'],
                                  time_batch=end_batch_time - start_batch_time)
                end_epoch_time = time.time()
                message = '[Train] Epoch:{}, train_ce_loss:{:4f}, train_tri_loss:{:4f}, lr:{}, train_time(s):{:4f}'.format(
                    epoch,
                    sum_train_ce_loss / len(train_dataloader.dataset),
                    sum_train_tri_loss / len(train_dataloader.dataset),
                    optimizer.param_groups[0]['lr'],
                    end_epoch_time - start_epoch_time
                )
            elif config.TRAIN.MODE == 'CCA':
                sum_train_ce_loss = 0
                sum_train_cca_loss = 0
                start_epoch_time = time.time()
                for (waveform, word_vec, target) in t:
                    start_batch_time = time.time()
                    t.set_description('Epoch {}'.format(epoch))

                    waveform = waveform.type(torch.FloatTensor)
                    word_vec = word_vec.type(torch.FloatTensor)
                    waveform = waveform.to(device)
                    word_vec = word_vec.to(device)
                    target = target.to(device)

                    optimizer.zero_grad()

                    output, audio_embedding, text_embedding = net(waveform, word_vec)

                    train_ce_loss = ce_criterion(output, target)
                    train_cca_loss = cca_criterion(audio_embedding, text_embedding)  # * config.TRAIN.BATCHSIZE
                    sum_train_ce_loss = sum_train_ce_loss + train_ce_loss.item() * config.TRAIN.BATCHSIZE
                    sum_train_cca_loss = sum_train_cca_loss + train_cca_loss * config.TRAIN.BATCHSIZE
                    train_loss = train_ce_loss + 0.01 * train_cca_loss

                    train_loss.backward()
                    optimizer.step()

                    end_batch_time = time.time()

                    t.set_postfix(train_ce_loss=train_ce_loss.item(),
                                  train_cca_loss=train_cca_loss.item(),
                                  lr=optimizer.param_groups[0]['lr'],
                                  time_batch=end_batch_time - start_batch_time)
                end_epoch_time = time.time()
                message = '[Train] Epoch:{}, train_ce_loss:{:4f}, train_cca_loss:{:4f}, lr:{}, train_time(s):{:4f}'.format(
                    epoch,
                    sum_train_ce_loss / len(train_dataloader.dataset),
                    sum_train_cca_loss / len(train_dataloader.dataset),
                    optimizer.param_groups[0]['lr'],
                    end_epoch_time - start_epoch_time
                )
            elif config.TRAIN.MODE == 'ThreeAudios':
                sum_train_ce_loss = 0
                sum_train_tri_loss = 0
                start_epoch_time = time.time()
                for (anchor_waveform, pos_waveform, neg_waveform, anchor_label_num) in t:
                    start_batch_time = time.time()

                    anchor_waveform = anchor_waveform.type(torch.FloatTensor)
                    pos_waveform = pos_waveform.type(torch.FloatTensor)
                    neg_waveform = neg_waveform.type(torch.FloatTensor)
                    anchor_waveform = anchor_waveform.to(device)
                    pos_waveform = pos_waveform.to(device)
                    neg_waveform = neg_waveform.to(device)
                    anchor_label_num = anchor_label_num.to(device)

                    optimizer.zero_grad()

                    anchor_output, audio_embedding_anchor, audio_embedding_pos, audio_embedding_neg = net(
                        anchor_waveform, pos_waveform, neg_waveform)

                    train_ce_loss = ce_criterion(anchor_output, anchor_label_num)
                    train_tri_loss = tri_criterion(audio_embedding_anchor, audio_embedding_pos, audio_embedding_neg)
                    sum_train_ce_loss = sum_train_ce_loss + train_ce_loss.item() * config.TRAIN.BATCHSIZE
                    sum_train_tri_loss = sum_train_tri_loss + train_tri_loss.item() * config.TRAIN.BATCHSIZE
                    train_loss = 0.5 * train_ce_loss + 0.5 * train_tri_loss

                    train_loss.backward()
                    optimizer.step()

                    end_batch_time = time.time()

                    t.set_postfix(train_ce_loss=train_ce_loss.item(),
                                  train_tri_loss=train_tri_loss.item(),
                                  lr=optimizer.param_groups[0]['lr'],
                                  time_batch=end_batch_time - start_batch_time)
                end_epoch_time = time.time()
                message = '[Train] Epoch:{}, train_ce_loss:{:4f}, train_tri_loss:{:4f}, lr:{}, train_time(s):{:4f}'.format(
                    epoch,
                    sum_train_ce_loss / len(train_dataloader.dataset),
                    sum_train_tri_loss / len(train_dataloader.dataset),
                    optimizer.param_groups[0]['lr'],
                    end_epoch_time - start_epoch_time
                )

        logger.info(message)

        valid_loss, valid_acc = valid(net, device=device, epoch=epoch, logger=logger)
        scheduler.step(valid_acc)  # valid acc not increase

        # Update best valid models
        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            best_valid_loss = valid_loss
            best_epoch = epoch
            best_net = net
            best_valid_model = 'epoch_{}_valid_loss_{:.4f}_acc_{:2f}.pth'.format(best_epoch, best_valid_loss,
                                                                                 best_valid_acc)
            if torch.cuda.device_count() == 1 or torch.cuda.device_count() == 0:
                best_state = {'net': best_net.state_dict(), 'optimizer': optimizer.state_dict(),
                              'epoch': best_epoch, 'valid_acc': best_valid_acc}
            else:
                best_state = {'net': best_net.module.state_dict(), 'optimizer': optimizer.state_dict(),
                              'epoch': best_epoch}
            counter = 0
        else:
            counter = counter + 1
            logger.info('[Early stopping] {}'.format(counter))

            if counter >= config.TRAIN.EARLYSTOP:
                logger.info('[Message] Early stopping.')
                logger.info('[Message] Best valid acc {:.4f}\n'.format(best_valid_acc))
                break
        logger.info('[Message] Best valid acc {:.4f}\n'.format(best_valid_acc))

    # save model optimizer and trained epoch
    torch.save(best_state, savedir + 'epoch_{}_valid_loss_{:.4f}_acc_{:2f}.pth'.format(
        best_epoch,
        best_valid_loss,
        best_valid_acc))

    test_net(best_net, savedir, logger, config.TRAIN.MODE)
    # Log to screen
    if step % disp_interval == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps, lr,
            momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every N iterations (N defined in handout)
    if step % eval_interval == 0 and step > 0:
        aps = test_net(name='test_weights',
                       net=net,
                       imdb=test_imdb,
                       logger=logger,
                       step=step,
                       visualize=True,
                       thresh=0.0001)
        if firstEval:
            vis.line(X=np.array([step]),
                     Y=np.array([np.mean(aps)]),
                     win="test/mAP",
                     opts=dict(title='Test mAP'))
            firstEval = 0
        else:
            vis.line(X=np.array([step]),
                     Y=np.array([np.mean(aps)]),
                     win="test/mAP",
                     update="append",
                     opts=dict(title='Test mAP'))
예제 #29
0
	
	if args.cfg_file is not None:
		cfg_from_file(args.cfg_file)
	if args.set_cfgs is not None:
		cfg_from_list(args.set_cfgs)
	
	cfg.GPU_ID = args.gpu_id
	'''

    print('Using config:')
    pprint.pprint(cfg)
    '''
	while not os.path.exists(args.caffemodel) and args.wait:
		print('Waiting for {} to exist...'.format(args.caffemodel))
		time.sleep(10)
	'''

    #	caffe.set_mode_gpu()
    #	caffe.set_device(args.gpu_id)
    #	net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
    #	net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]

    imdb = get_imdb(imdb_name)
    '''
	imdb.competition_mode(args.comp_mode)
	if not cfg.TEST.HAS_RPN:
		imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
	'''

    test_net(imdb)
예제 #30
0
 def func(gpu_id):
     tester = Tester(Model(), self.cfg)
     tester.load_weights(val_model, model_dump_dir=model_dir)
     range = [ranges[gpu_id], ranges[gpu_id + 1]]
     return test_net(tester, dets, range, gpu_id, self.d.sigmas, False)
예제 #31
0
파일: train.py 프로젝트: lizimo061/16824
    # Log to screen
    if step % disp_interval == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration
        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch), lr: %.9f, momen: %.4f, wt_dec: %.6f' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps, lr,
            momentum, weight_decay)
        log_print(log_text, color='green', attrs=['bold'])
        re_cnt = True

    #TODO: evaluate the model every N iterations (N defined in handout)
    if step % eval_interval == 0 and step > 0:
        net.eval()
        aps = test_net("test",
                       net,
                       test_imdb,
                       visualize=True,
                       logger=logger,
                       step=step)
        logger.scalar_summary("test/map", np.mean(aps), step)

        if logger.vis != None:
            if firstFlag:
                logger.vis.line(X=np.array([step]),
                                Y=np.array([np.mean(aps)]),
                                win="test/mAP",
                                opts=dict(title='Test mAP'))
                firstFlag = False
            else:
                logger.vis.line(X=np.array([step]),
                                Y=np.array([np.mean(aps)]),
                                win="test/mAP",