def main(): import sys import pathlib __dir__ = pathlib.Path(os.path.abspath(__file__)) sys.path.append(str(__dir__)) sys.path.append(str(__dir__.parent.parent)) from models import build_model, build_loss from data_loader import get_dataloader from utils import Trainer from utils import get_post_processing from utils import get_metric config = anyconfig.load(open('config.yaml', 'rb')) train_loader = get_dataloader(config['dataset']['train']) validate_loader = get_dataloader(config['dataset']['validate']) criterion = build_loss(config['loss']).cuda() model = build_model(config['arch']) post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader) trainer.train()
def main_entrance(): os.environ["CUDA_VISIBLE_DEVICES"] = '3' args = init_args() config = anyconfig.load(open(args.config_file, 'rb')) # print('===config:', config) if 'base' in config: config = parse_config(config) print('===config:', config) if torch.cuda.device_count() > 1: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank) config['distributed'] = True else: config['distributed'] = False config['local_rank'] = args.local_rank logging.info(config['dataset']['train']) model = build_model(config['arch']['type'], **config['arch']) criterion = build_loss(config['loss'].pop('type'), **config['loss']).cuda() post_process = get_post_processing(config['post_processing']) train_loader = get_trainloader(dataset.ICDAR2015Dataset, config) eval_loader = get_evalloader(dataset.ICDAR2015Dataset, config) model = model.cuda() if config['distributed']: model = nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank, broadcast_buffers=False, find_unused_parameters=True) checkpoint_path = config['train']['resume_checkpoint'] output_path = config['train']['output_path'] optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.00005) # load_weights(model, optimizer, config['distributed'], checkpoint_path='/red_detection/DBNet/code_pretrain_model/model_latest_express_code_7_13.pth') # load_weights(model, optimizer, config['distributed'], checkpoint_path=checkpoint_path) epochs = config['train']['epochs'] warmup_iters = config['lr_scheduler']['args']['warmup_epoch']*len(train_loader) # scheduler = WarmupPolyLR(optimizer, max_iters=epochs * len(train_loader), # warmup_iters=warmup_iters, **config['lr_scheduler']['args']) train(model, optimizer, epochs, criterion, train_loader, config, post_process, eval_loader,output_path) from matplotlib import pyplot as plt plt.plot(lr_list) plt.savefig('./show_lr_word_industry.png')
def main(config): import torch from models import build_model, build_loss from data_loader import get_dataloader from trainer import Trainer from post_processing import get_post_processing from utils import get_metric if torch.cuda.device_count() > 1: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank) config['distributed'] = True else: config['distributed'] = False config['local_rank'] = args.local_rank train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = build_loss(config['loss'].pop('type'), **config['loss']).cuda() config['arch']['backbone']['in_channels'] = 3 if config['dataset'][ 'train']['dataset']['args']['img_mode'] != 'GRAY' else 1 config['arch']['backbone']['pretrained'] = False model = build_model(config['arch']['type'], **config['arch']) post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader) trainer.train()
import tensorflow as tf import signal from utils import * import models inputs, keep_prob, result = models.build_model() labels = tf.placeholder(dtype=tf.float32, shape=[None], name="labels") loss = models.build_loss(labels=labels, result=result, regularizer_weight=config.REGULARIZER_WEIGHT, regularized_vars=tf.trainable_variables()) training_step = tf.Variable(0, trainable=False, name="training_step") train = tf.train.AdamOptimizer(1e-5).minimize(loss, global_step=training_step) dataset = load_training_data(config.BATCH_SIZE) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() exit_signal = False def exit_handler(*args): exit_signal = True signal.signal(signal.SIGINT, exit_handler) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
def main(config): import torch from models import build_model, build_loss from data_loader import get_dataloader from trainer import Trainer from post_processing import get_post_processing from utils import get_metric from utils import setup_logger if torch.cuda.device_count() > 1: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank) config['distributed'] = True else: config['distributed'] = False config['local_rank'] = args.local_rank config['arch']['backbone']['in_channels'] = 3 if config['dataset'][ 'train']['dataset']['args']['img_mode'] != 'GRAY' else 1 model = build_model(config['arch']) if config['local_rank'] == 0: save_dir = os.path.join(config['trainer']['output_dir'], config['name'] + '_' + model.name) if not os.path.exists(save_dir): os.makedirs(save_dir) logger = setup_logger(os.path.join(save_dir, 'train.log')) if 'evolve' in config.keys( ) and config['evolve']['flag'] and not config['distributed']: meta = { 'optimizer.args.lr': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lr_scheduler.args.warmup_epoch': (1, 0, 5), 'loss.alpha': (1, 0.5, 3), 'loss.beta': (2, 5, 20), 'loss.ohem_ratio': (1, 1, 5), 'post_processing.args.box_thresh': (0.3, 0.4, 1.0), 'dataset.train.dataset.args.pre_processes.[1].args.min_crop_side_ratio': (1, 0.1, 0.9), 'dataset.train.dataset.args.pre_processes.[2].args.thresh_max': (0.3, 0.4, 1.0), } # image mixup (probability) config['notest'] = True config['nosave'] = True saved_path = os.path.join(config['trainer']['output_dir'], config['name'] + '_' + model.name) if not os.path.exists(os.path.join(saved_path, 'evolve')): os.makedirs(os.path.join(saved_path, 'evolve')) yaml_file = os.path.join(saved_path, 'evolve', 'hyp_evolved.yaml') evolve_file = os.path.join(saved_path, 'evolve', 'evolve.txt') for _ in range(300): if os.path.exists(evolve_file): parent = 'single' x = np.loadtxt(evolve_file, ndmin=2) n = min(5, len(x)) x = x[np.argsort(-fitness(x))][:n] w = fitness(x) - fitness(x).min() if len(x) == 1: x = x[0] elif parent == 'single': # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape( n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([x[0] for x in meta.values()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all( v == 1 ): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) # for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) # hyp[k] = float(x[i + 7] * v[i]) # mutate for i, k in enumerate(meta.keys()): config_keys = k.split('.') str_config = 'config' for config_key in config_keys: if config_key.startswith('[') and config_key.endswith( ']'): str_config = str_config + config_key else: str_config = str_config + '[\'' + config_key + '\']' exec(str_config + '=x[i]*v[i]') meta_value = [] for k, v in meta.items(): config_keys = k.split('.') str_config = 'config' for config_key in config_keys: if config_key.startswith('[') and config_key.endswith(']'): str_config = str_config + config_key else: str_config = str_config + '[\'' + config_key + '\']' # str_config = 'config[\'' + '\'][\''.join(k.split('.')) + '\']' exec('print(' + str_config + ')') exec(str_config + '=max(' + str_config + ', v[1])') exec(str_config + ' = min(' + str_config + ', v[2])') exec(str_config + ' = round(' + str_config + ', 5)') exec('meta_value.append(' + str_config + ')') # hyp[k] = max(hyp[k], v[1]) # lower limit # hyp[k] = min(hyp[k], v[2]) # upper limit # hyp[k] = round(hyp[k], 5) # significant digits train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = build_loss(config['loss']).cuda() post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer( config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader, logger=(logger if config['local_rank'] == 0 else None)) results = trainer.train() print_mutation(results, yaml_file, evolve_file, meta_value) else: train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = build_loss(config['loss']).cuda() post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer( config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader, logger=(logger if config['local_rank'] == 0 else None)) trainer.train()