Example #1
0
def main(gpu, ngpus_per_node, args):
    # 将gpu复制到args里
    args.gpu = gpu
    if args.distributed:
        args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    rank = args.rank

    # 读取配置文件
    # cfg_file是必须传入的要训练模型名字
    cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    with open(cfg_file, "r") as f:
        config = json.load(f)

    # 上面的读取的单独模型配置的"system"里添加一项
    config["system"]["snapshot_name"] = args.cfg_file

    # 更新参数配置
    system_config = SystemConfig().update_config(config["system"])

    # 模型的名字作为模型导入
    model_file = "core.models.{}".format(args.cfg_file)
    model_file = importlib.import_module(model_file)
    model = model_file.model()

    # 从i系统配置里取出的参数
    train_split = system_config.train_split
    val_split = system_config.val_split

    print("Process {}: loading all datasets...".format(rank))

    # 用了几个worker
    dataset = system_config.dataset
    workers = args.workers
    print("Process {}: using {} workers".format(rank, workers))

    training_dbs = [
        datasets[dataset](config["db"],
                          split=train_split,
                          sys_config=system_config) for _ in range(workers)
    ]
    validation_db = datasets[dataset](config["db"],
                                      split=val_split,
                                      sys_config=system_config)

    if rank == 0:
        print("system config...")
        pprint.pprint(system_config.full)

        print("db config...")
        pprint.pprint(training_dbs[0].configs)

        print("len of db: {}".format(len(training_dbs[0].db_inds)))
        print("distributed: {}".format(args.distributed))

    # 调用train训练函数
    train(training_dbs, validation_db, system_config, model, args)
Example #2
0
    def Setup(self):
        distributed = self.system_dict["model"]["params"]["distributed"]
        world_size  = self.system_dict["model"]["params"]["world_size"]

        ngpus_per_node  = torch.cuda.device_count()

        current_dir = os.path.dirname(os.path.realpath(__file__));

        cfg_file = os.path.join(current_dir, "configs", self.system_dict["model"]["params"]["cfg_file"] + ".json")
        with open(cfg_file, "r") as f:
            self.system_dict["local"]["config"] = json.load(f)

        self.system_dict["local"]["config"]["db"]["root_dir"] = self.system_dict["dataset"]["train"]["root_dir"];
        self.system_dict["local"]["config"]["db"]["coco_dir"] = self.system_dict["dataset"]["train"]["coco_dir"];
        self.system_dict["local"]["config"]["db"]["img_dir"] = self.system_dict["dataset"]["train"]["img_dir"];
        self.system_dict["local"]["config"]["db"]["set_dir"] = self.system_dict["dataset"]["train"]["set_dir"];

        f = open(self.system_dict["dataset"]["train"]["root_dir"] + "/" + self.system_dict["dataset"]["train"]["coco_dir"] + "/annotations/classes.txt");
        lines = f.readlines();
        f.close();

        self.system_dict["local"]["config"]["db"]["categories"] = len(lines);

        self.system_dict["local"]["config"]["system"]["batch_size"] = self.system_dict["dataset"]["params"]["batch_size"];
        self.system_dict["local"]["config"]["system"]["chunk_sizes"] = [self.system_dict["dataset"]["params"]["batch_size"]];
        self.system_dict["local"]["config"]["system"]["max_iter"] = self.system_dict["training"]["params"]["total_iterations"];

        self.system_dict["local"]["config"]["system"]["snapshot_name"] = self.system_dict["model"]["params"]["cfg_file"]
        self.system_dict["local"]["system_config"] = SystemConfig().update_config(self.system_dict["local"]["config"]["system"])

        self.system_dict["local"]["training_dbs"] = [datasets[self.system_dict["local"]["system_config"].dataset](self.system_dict["local"]["config"]["db"], 
                                                            sys_config=self.system_dict["local"]["system_config"]) for _ in range(self.system_dict["dataset"]["params"]["workers"])]

        if(self.system_dict["dataset"]["val"]["status"]):
            self.system_dict["local"]["config"]["db"]["root_dir"] = self.system_dict["dataset"]["val"]["root_dir"];
            self.system_dict["local"]["config"]["db"]["coco_dir"] = self.system_dict["dataset"]["val"]["coco_dir"];
            self.system_dict["local"]["config"]["db"]["img_dir"] = self.system_dict["dataset"]["val"]["img_dir"];
            self.system_dict["local"]["config"]["db"]["set_dir"] = self.system_dict["dataset"]["val"]["set_dir"];

            self.system_dict["local"]["validation_db"] = datasets[self.system_dict["local"]["system_config"].dataset](self.system_dict["local"]["config"]["db"], 
                                                                sys_config=self.system_dict["local"]["system_config"])


        if(not os.path.isdir("cache/")):
            os.mkdir("cache");
        if(not os.path.isdir("cache/nnet")):
            os.mkdir("cache/nnet/");
        if(not os.path.isdir("cache/nnet/" + self.system_dict["model"]["params"]["cfg_file"])):
            os.mkdir("cache/nnet/" + self.system_dict["model"]["params"]["cfg_file"]);

        model_file  = "core.models.{}".format(self.system_dict["model"]["params"]["cfg_file"])
        print("Loading Model - {}".format(model_file))
        model_file  = importlib.import_module(model_file)
        self.system_dict["local"]["model"] = model_file.model(self.system_dict["local"]["config"]["db"]["categories"])
        print("Model Loaded");
Example #3
0
def main(args):
    # 后缀的与否,以及整个配置文件在config文件夹下
    if args.suffix is None:
        cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    else:
        cfg_file = os.path.join("./configs",
                                args.cfg_file + "-{}.json".format(args.suffix))
    print("\033[1;36m cfg_file(模型配置文件): \033[0m {} ".format(cfg_file))

    # 使用json.load读取json配置文件
    with open(cfg_file, "r") as f:
        config = json.load(f)

    # 添加快照的配置,并在完成后生成系统配置类的对象
    config["system"]["snapshot_name"] = args.cfg_file
    system_config = SystemConfig().update_config(config["system"])

    # 模型文件名生成 导入模型 初始化模型
    model_file = "core.models.{}".format(args.cfg_file)
    model_file = importlib.import_module(model_file)
    model = model_file.model()

    # 考虑训练步长、验证步长和测试步长
    train_split = system_config.train_split
    val_split = system_config.val_split
    test_split = system_config.test_split

    # 默认使用的是validation的split分割
    # print(train_split)
    # print(args.split)
    split = {
        "train": train_split,
        "valid": val_split,
        "test": test_split
    }[args.split]

    print("\033[0;36m loading all datasets(加载所有数据集中)... \033[0m ")
    dataset = system_config.dataset
    print("\033[1;36m split(使用分割): \033[0m {}".format(split))
    testing_db = datasets[dataset](config["db"],
                                   split=split,
                                   sys_config=system_config)

    print("\033[1;36m 生成数据模型: \033[0m {}".format(testing_db))

    print("\033[0;36m system config(系统配置)...\033[0m ")
    pprint.pprint(system_config.full)

    print("\033[0;36m db config(数据集配置)...\033[0m ")
    pprint.pprint(testing_db.configs)

    test(testing_db, system_config, model, args)
Example #4
0
def main(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.distributed:
        args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    rank = args.rank

    cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    with open(cfg_file, "r") as f:
        config = json.load(f)

    config["system"]["snapshot_name"] = args.cfg_file
    system_config = SystemConfig().update_config(config["system"])

    model_file = "core.models.{}".format(args.cfg_file)
    model_file = importlib.import_module(model_file)
    model = model_file.model()

    # VALIDATE
    # hg_model = model.hg
    # for param in hg_model.parameters():
    #     param.requires_grad = False
    # hg_model.eval()

    # print('TESTING MODEL END HERE')

    train_split = system_config.train_split
    val_split = system_config.val_split

    print("Process {}: loading all datasets...".format(rank))
    dataset = system_config.dataset
    workers = args.workers
    print("Process {}: using {} workers".format(rank, workers))
    training_dbs = [datasets[dataset](
        config["db"], split=train_split, sys_config=system_config) for _ in range(workers)]
    validation_db = datasets[dataset](
        config["db"], split=val_split, sys_config=system_config)

    if rank == 0:
        print("system config...")
        pprint.pprint(system_config.full)

        print("db config...")
        pprint.pprint(training_dbs[0].configs)

        print("len of db: {}".format(len(training_dbs[0].db_inds)))
        print("distributed: {}".format(args.distributed))

    train(training_dbs, validation_db, system_config, model, args)
Example #5
0
def main(args):
    #     os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    args.gpu = None

    cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    with open(cfg_file, "r") as f:
        config = json.load(f)

    print("load cfg file: {}".format(cfg_file))

    #update fields in config.py by the json file
    config["system"]["snapshot_name"] = args.cfg_file
    system_config = SystemConfig().update_config(config["system"])

    #init model according the config file name
    model_file = "core.models.{}".format(args.cfg_file)
    model_file = importlib.import_module(model_file)
    model = model_file.model()

    #set train and val dataset name
    train_split = system_config.train_split
    val_split = system_config.val_split

    print("loading all datasets...")
    dataset = system_config.dataset
    workers = args.workers
    print("using {} workers".format(workers))
    training_dbs = [
        datasets[dataset](config["db"],
                          split=train_split,
                          sys_config=system_config) for _ in range(workers)
    ]
    validation_db = datasets[dataset](config["db"],
                                      split=val_split,
                                      sys_config=system_config)

    print("system config...")
    pprint.pprint(system_config.full)

    print("db config...")
    pprint.pprint(training_dbs[0].configs)

    print("len of db: {}".format(len(training_dbs[0].db_inds)))

    train(training_dbs, validation_db, system_config, model, args)
Example #6
0
def main(args):
    #     os.environ["CUDA_VISIBLE_DEVICES"] = "2"

    if args.suffix is None:
        cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    else:
        cfg_file = os.path.join("./configs",
                                args.cfg_file + "-{}.json".format(args.suffix))
    print("cfg_file: {}".format(cfg_file))

    with open(cfg_file, "r") as f:
        config = json.load(f)

    config["system"]["snapshot_name"] = args.cfg_file
    system_config = SystemConfig().update_config(config["system"])

    model_file = "core.models.{}".format(args.cfg_file)
    model_file = importlib.import_module(model_file)
    model = model_file.model()

    train_split = system_config.train_split
    val_split = system_config.val_split
    test_split = system_config.test_split

    split = {
        "training": train_split,
        "validation": val_split,
        "testing": test_split
    }[args.split]

    print("loading all datasets...")
    dataset = system_config.dataset
    print("split: {}".format(split))
    testing_db = datasets[dataset](config["db"],
                                   split=split,
                                   sys_config=system_config)

    print("system config...")
    pprint.pprint(system_config.full)

    print("db config...")
    pprint.pprint(testing_db.configs)

    test(testing_db, system_config, model, args)
Example #7
0
def main(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.distributed:
        args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    rank = args.rank

    cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    with open(cfg_file, "r") as f:
        config = json.load(f)

    config["system"]["snapshot_name"] = args.cfg_file
    system_config = SystemConfig().update_config(config["system"])

    model_file = "core.models.{}".format(args.cfg_file)
    model_file = importlib.import_module(model_file)
    model = model_file.model(num_classes=config["db"]["categories"])

    train_split = system_config.train_split
    val_split = system_config.val_split

    ckpt_path = os.path.join('cache/nnet/', args.cfg_file, date)
    train_logger = pLogger(ckpt_path)

    if not os.path.exists(ckpt_path):
        os.makedirs(os.path.join(ckpt_path))
    shutil.copyfile('{}'.format(cfg_file),
                    '{}/{}'.format(ckpt_path, args.cfg_file + ".json"))

    train_logger.train_logging(
        "Process {}: loading all datasets...".format(rank))
    dataset = system_config.dataset
    workers = args.workers
    train_logger.train_logging("Process {}: using {} workers".format(
        rank, workers))
    training_dbs = [
        datasets[dataset](config["db"],
                          split=train_split,
                          sys_config=system_config) for _ in range(workers)
    ]
    validation_db = datasets[dataset](config["db"],
                                      split=val_split,
                                      sys_config=system_config)

    if rank == 0:
        print("system config...")
        pprint.pprint(system_config.full)
        train_logger.train_logging("system config...")
        train_logger.train_logging(system_config.full)

        print("db config...")
        pprint.pprint(training_dbs[0].configs)
        train_logger.train_logging("db config...")
        train_logger.train_logging(training_dbs[0].configs)

        train_logger.train_logging("len of db: {}".format(
            len(training_dbs[0].db_inds)))
        train_logger.train_logging("distributed: {}".format(args.distributed))

    train(train_logger, training_dbs, validation_db, system_config, model,
          args)
Example #8
0
import torch
from core.models.CornerNet_Saccade import model
from core.paths import get_file_path
from core.base import load_cfg, load_nnet
from core.config import SystemConfig


cfg_path = get_file_path("..", "configs", "CornerNet_Saccade.json")
model_path = get_file_path("nnet", "CornerNet_Saccade_500000.pkl")

cfg_sys, cfg_db = load_cfg(cfg_path)
sys_cfg = SystemConfig().update_config(cfg_sys)

cornernet = load_nnet(sys_cfg, model())
example = torch.rand(1, 3, 224, 224).cuda()
torch_out = torch.onnx.export(cornernet.model,
                              example,
                              "test.onnx",
                              verbose=True
                              )
print("onnx done")