예제 #1
0
def run_main(config):
    ''' running MAMC training & validation'''
    # init model
    initialize(config)
    training_model = build_training_model(config)
    validation_model = build_validation_model(config)

    # print network graph
    """
    # full-graph
    mamc_graph = net_drawer.GetPydotGraph(
        validation_model.net.Proto().op,
        "mamc_graph",
        rankdir="TB",
    )
    mamc_graph.write_svg("mamc_no_npairloss_graph.svg")
    print("write graph over...")
    sys.exit(0)

    # # mini-graph
    # mamc_graph_mini = net_drawer.GetPydotGraphMinimal(
    #     validation_model.net.Proto().op,
    #     "mamc_graph_minimal",
    #     rankdir="TB",
    #     minimal_dependency=True
    # )
    # mamc_graph_mini.write_svg("mamc_no_npairloss_graph_mini.svg")
    # print("write graph over...")
    # sys.exit(0)
    """

    # experiment params config
    # training mode
    # tag = "imagenet"
    tag = "cars196"
    if config['finetune']:
        tag = 'FINETUNE-{}'.format(tag)
    else:
        tag = 'RETRAIN-{}'.format(tag)

    root_experiments_dir = os.path.join(config['root_dir'], 'experiments')
    experiment = Experiment(root_experiments_dir, tag)
    experiment.add_config_file(config['config_path'])

    # add chart
    chart_acc = experiment.add_chart('accuracy',
                                     xlabel='epochs',
                                     ylabel='accuracy')
    chart_acc_5 = experiment.add_chart('accuracy_5',
                                       xlabel='epochs',
                                       ylabel='accuracy_5')
    chart_softmax_loss = experiment.add_chart('softmax_loss',
                                              xlabel='epochs',
                                              ylabel='softmax_loss')

    # plot params (should be added into 'experiment module'
    # TODO add 'variable' object to Experiment class
    training_acc_statistics = []
    training_acc5_statistics = []
    training_softmax_loss_statistics = []
    epoch_training_acc = 0
    epoch_training_acc5 = 0
    epoch_training_softmax_loss = 0
    training_accuracy = 0
    training_accuracy_5 = 0
    training_softmax_loss = 0

    validation_acc_statistics = []
    validation_acc5_statistics = []
    validation_softmax_loss_statistics = []

    # run the model
    experiment.add_log(
        "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    )
    for training_iter in tqdm(range(config['solver']['max_iterations'])):
        workspace.RunNet(training_model.net)
        accuracy = workspace.FetchBlob('accuracy')
        accuracy_5 = workspace.FetchBlob('accuracy_5')
        softmax_loss = workspace.FetchBlob('softmax_loss')

        epoch_training_acc += accuracy
        epoch_training_acc5 += accuracy_5
        epoch_training_softmax_loss += softmax_loss

        training_accuracy += accuracy
        training_accuracy_5 += accuracy_5
        training_softmax_loss += softmax_loss

        # display training result
        if training_iter != 0 and (training_iter +
                                   1) % config['solver']['display'] == 0:
            experiment.add_log("[TRAIN] epoch: {}   iteration: {}   accuracy: {:.4f}   "\
                  "accuracy_5: {:.4f}   softmax_loss: {:.4f}".format(
                      (training_iter // config['solver']['train_iterations'] + 1),
                      training_iter,
                      training_accuracy / config['solver']['display'],
                      training_accuracy_5 / config['solver']['display'],
                      training_softmax_loss / config['solver']['display'],
            ))
            experiment.add_log("Global learning rate: {}".format(
                workspace.FetchBlob(
                    'MultiPrecisionSgdOptimizer_0_lr_gpu{}'.format(
                        config['gpu_id']))))

            # cleanup the counters
            training_accuracy = training_accuracy_5 = training_softmax_loss = 0

        # plot training statistics every epoch
        if training_iter != 0 and (
                training_iter + 1) % config['solver']['train_iterations'] == 0:
            training_acc_statistics.append(
                epoch_training_acc / config['solver']['train_iterations'])
            training_acc5_statistics.append(
                epoch_training_acc5 / config['solver']['train_iterations'])
            training_softmax_loss_statistics.append(
                epoch_training_softmax_loss /
                config['solver']['train_iterations'])

            epoch_training_acc = 0
            epoch_training_acc5 = 0
            epoch_training_softmax_loss = 0

            experiment.add_plot(chart_acc, training_acc_statistics, 'r.--',
                                'training')
            experiment.add_plot(chart_acc_5, training_acc5_statistics, 'r.--',
                                'training')
            experiment.add_plot(chart_softmax_loss,
                                training_softmax_loss_statistics, 'b+--',
                                'training')

        # snapshot training model params
        if training_iter != 0 and (training_iter +
                                   1) % config['solver']['snapshot'] == 0:
            # save the model weights
            print("[INFO] snapshot the model..... ")
            experiment.add_init_net_snapshot(
                training_model.GetAllParams(),
                workspace,
                config['name'],
                tag,
                (training_iter // config['solver']['train_iterations'] + 1),
            )
            print("[INFO] snapshot the model. Done.....")

        # start to validate the model
        if training_iter != 0 and (training_iter +
                                   1) % config['solver']['test_interval'] == 0:
            test_accuracy = 0
            test_accuracy_5 = 0
            test_softmax_loss = 0

            for test_iter in range(config['solver']['test_iterations']):
                workspace.RunNet(validation_model.net)
                accuracy = workspace.FetchBlob('accuracy')
                accuracy_5 = workspace.FetchBlob('accuracy_5')
                softmax_loss = workspace.FetchBlob('softmax_loss')

                # update counter
                test_accuracy += accuracy
                test_accuracy_5 += accuracy_5
                test_softmax_loss += softmax_loss
                experiment.add_log("[VALIDATION] accuracy: {:.4f}   accuracy_5: {:.4f}   "\
                      "softmax_loss: {:.4f}".format(
                    accuracy, accuracy_5, softmax_loss))

            # end validation
            experiment.add_log(
                "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
            )
            experiment.add_log("[VALIDATION] avg_acc: {:.4f}   avg_acc_5: {:.4f}   "\
                  "avg_softmax_loss: {:.4f}".format(
                      test_accuracy / config['solver']['test_iterations'],
                      test_accuracy_5 / config['solver']['test_iterations'],
                      test_softmax_loss / config['solver']['test_iterations'],
                  )
            )

            # plot validation statistics
            validation_acc_statistics.append(
                test_accuracy / config['solver']['test_iterations'])
            validation_acc5_statistics.append(
                test_accuracy_5 / config['solver']['test_iterations'])
            validation_softmax_loss_statistics.append(
                test_softmax_loss / config['solver']['test_iterations'])

            experiment.add_plot(chart_acc, validation_acc_statistics, 'c.--',
                                'validation')
            experiment.add_plot(chart_acc_5, validation_acc5_statistics,
                                'c.--', 'validation')
            experiment.add_plot(chart_softmax_loss,
                                validation_softmax_loss_statistics, 'g+--',
                                'validation')

    experiment.add_log(
        "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    )
예제 #2
0
def run_main(config):
    ''' running MAMC training & validation'''
    # init model
    initialize(config)

    # build model
    training_model = build_training_model(config)
    validation_model = build_validation_model(config)

    # print network graph
    # generate_network_graph(validation_model, config, use_mini=True)
    # generate_network_graph(validation_model, config, use_mini=False)
    # generate_network_graph(validation_model, config, tag="topk", use_mini=True)
    # generate_network_graph(validation_model, config, tag="topk", use_mini=False)
    # sys.exit(0)

    # experiment params config
    root_experiments_dir = os.path.join(config['root_dir'], 'experiments')
    assert(config['dataset_name'] is not None)
    if config['model_name'] is not None:
        root_experiments_dir = os.path.join(
            root_experiments_dir,
            "{}-{}".format(
                config['dataset_name'],
                config['model_name'],
            ),
        )
    else:
        root_experiments_dir = os.path.join(
            root_experiments_dir, config['dataset_name'])

    config_file_name = os.path.splitext(
        os.path.basename(config['config_path']))[0]
    tag = "{}-{}".format(config['name'], config_file_name)
    experiment = Experiment(root_experiments_dir, tag)
    experiment.add_config_file(config['config_path'])

    # add chart
    chart_acc = experiment.add_chart('accuracy', xlabel='epochs',
                                     ylabel='accuracy')
    chart_acc_5 = experiment.add_chart('accuracy_5', xlabel='epochs',
                                       ylabel='accuracy_5')
    chart_softmax_loss = experiment.add_chart('softmax_loss', xlabel='epochs',
                                              ylabel='softmax_loss')

    # plot params (should be added into 'experiment module'
    # TODO add 'variable' object to Experiment class
    training_acc_statistics = []
    training_acc5_statistics = []
    training_softmax_loss_statistics = []
    epoch_training_acc = 0
    epoch_training_acc5 = 0
    epoch_training_softmax_loss = 0
    training_accuracy = 0
    training_accuracy_5 = 0
    training_softmax_loss = 0

    validation_acc_statistics = []
    validation_acc5_statistics = []
    validation_softmax_loss_statistics = []

    best_acc = 0

    # run the model
    experiment.add_log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    for training_iter in tqdm(range(config['solver']['max_iterations'])):
        workspace.RunNet(training_model.net)
        accuracy = workspace.FetchBlob('accuracy')
        accuracy_5 = workspace.FetchBlob('accuracy_5')
        softmax_loss = workspace.FetchBlob('softmax_loss')

        epoch_training_acc += accuracy
        epoch_training_acc5 += accuracy_5
        epoch_training_softmax_loss += softmax_loss

        training_accuracy += accuracy
        training_accuracy_5 += accuracy_5
        training_softmax_loss += softmax_loss

        # display training result
        if training_iter != 0 and (training_iter + 1) % config['solver']['display'] == 0:
            experiment.add_log("[TRAIN] epoch: {}   iteration: {}   accuracy: {:.4f}   "\
                  "accuracy_5: {:.4f}   softmax_loss: {:.4f}".format(
                      (training_iter // config['solver']['train_iterations'] + 1),
                      training_iter,
                      training_accuracy / config['solver']['display'],
                      training_accuracy_5 / config['solver']['display'],
                      training_softmax_loss / config['solver']['display'],
            ))
            experiment.add_log("Global learning rate: {}".format(
                workspace.FetchBlob('MultiPrecisionSgdOptimizer_0_lr_gpu{}'.format(config['gpu_id']))))

            # cleanup the counters
            training_accuracy = training_accuracy_5 = training_softmax_loss = 0

        # plot training statistics every epoch
        if training_iter != 0 and (training_iter + 1) % config['solver']['train_iterations'] == 0:
            training_acc_statistics.append(epoch_training_acc / config['solver']['train_iterations'])
            training_acc5_statistics.append(epoch_training_acc5 / config['solver']['train_iterations'])
            training_softmax_loss_statistics.append(epoch_training_softmax_loss / config['solver']['train_iterations'])

            epoch_training_acc = 0
            epoch_training_acc5 = 0
            epoch_training_softmax_loss = 0

            experiment.add_plot(chart_acc, training_acc_statistics, 'r.--', 'training')
            experiment.add_plot(chart_acc_5, training_acc5_statistics, 'r.--', 'training')
            experiment.add_plot(chart_softmax_loss, training_softmax_loss_statistics, 'b+--', 'training')

        # start to validate the model
        if training_iter != 0 and (training_iter + 1) % config['solver']['test_interval'] == 0:
            test_accuracy = 0
            test_accuracy_5 = 0
            test_softmax_loss = 0
            test_loss = 0

            for test_iter in range(config['solver']['test_iterations']):
                workspace.RunNet(validation_model.net)
                accuracy = workspace.FetchBlob('accuracy')
                accuracy_5 = workspace.FetchBlob('accuracy_5')
                softmax_loss = workspace.FetchBlob('softmax_loss')

                # update counter
                test_accuracy += accuracy
                test_accuracy_5 += accuracy_5
                test_softmax_loss += softmax_loss
                experiment.add_log("[VALIDATION] accuracy: {:.4f}   accuracy_5: {:.4f}   "\
                                   "softmax_loss: {:.4f}".format(
                    accuracy, accuracy_5, softmax_loss))

            # end validation
            if test_accuracy / config['solver']['test_iterations'] > best_acc:
                best_acc = test_accuracy / config['solver']['test_iterations']
            experiment.add_log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
            experiment.add_log("[VALIDATION] avg_acc: {:.4f}   best_acc: {:.4f}   avg_acc_5: {:.4f}   "\
                               "avg_softmax_loss: {:.4f}".format(
                      test_accuracy / config['solver']['test_iterations'],
                      best_acc,
                      test_accuracy_5 / config['solver']['test_iterations'],
                      test_softmax_loss / config['solver']['test_iterations'],
                  )
            )

            # snapshot training model params
            print("[INFO] snapshot the model..... ")
            experiment.add_init_net_snapshot(
                training_model.GetAllParams(),
                workspace,
                config,
                (training_iter // config['solver']['train_iterations'] + 1),
                test_accuracy / config['solver']['test_iterations'],
                best_acc,
            )
            print("[INFO] snapshot the model. Done.....")

            # plot validation statistics
            validation_acc_statistics.append(test_accuracy / config['solver']['test_iterations'])
            validation_acc5_statistics.append(test_accuracy_5 / config['solver']['test_iterations'])
            validation_softmax_loss_statistics.append(test_softmax_loss / config['solver']['test_iterations'])

            experiment.add_plot(chart_acc, validation_acc_statistics, 'c.--', 'validation')
            experiment.add_plot(chart_acc_5, validation_acc5_statistics, 'c.--', 'validation')
            experiment.add_plot(chart_softmax_loss, validation_softmax_loss_statistics, 'g+--', 'validation')

    experiment.add_log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")