def setup_experiment_env( *, val_fraction: float, batch_size: int, network: str, model_params: Dict[str, Any], lr_extraction: float, lr_tuning: float, loss: Dict[str, Any], epochs_extraction: int, epochs_tuning: int, objective_metric: str, seed: int, ) -> None: """ setup general configuration for experiment :param val_fraction: :param batch_size: :param network: :param model_params: :param lr_extraction: :param lr_tuning: :param loss: :param epochs_extraction: :param epochs_tuning: :param objective_metric: :param seed: :return: """ experiment_config = ExperimentConfig( network=network, epochs_extraction=epochs_extraction, epochs_tuning=epochs_tuning, batch_size=batch_size, val_fraction=val_fraction, model_params=model_params, lr_extraction=lr_extraction, lr_tuning=lr_tuning, loss=loss, objective_metric=objective_metric, seed=seed, ) setup_experiment_dir(experiment_config.network) config.set_log_to_file(True) log_arguments_to_file(experiment_config) for logger_name in config.get_loggers(): logger = logging.getLogger(logger_name) setup_logger(logger)
def main() -> None: args = parse_args() setup_logger() input_ = CreateEventsInput( calendar_id=args.calendar_id, summary=args.summary, from_date=args.from_date, to_date=args.to_date, start_time=args.start_time, end_time=args.end_time, weekday=args.weekday, ) responses = create_events(input_) print(f"Created. count:{len(responses)}")
def main(args): logger = setup_logger( "Listen_to_look, classification", args.checkpoint_path, True ) logger.debug(args) writer = None if args.visualization: writer = setup_tbx( args.checkpoint_path, True ) if writer is not None: logger.info("Allowed Tensorboard writer") # create model builder = ModelBuilder() net_classifier = builder.build_classifierNet(512, args.num_classes).cuda() net_imageAudio = builder.build_imageAudioNet().cuda() net_imageAudioClassify = builder.build_imageAudioClassifierNet(net_imageAudio, net_classifier, args, weights=args.weights_audioImageModel).cuda() model = builder.build_audioPreviewLSTM(net_imageAudio, net_classifier, args) model = model.cuda() # define loss function (criterion) and optimizer criterion = {} criterion['CrossEntropyLoss'] = nn.CrossEntropyLoss().cuda() cudnn.benchmark = True checkpointer = Checkpointer(model) if args.pretrained_model is not None: if not os.path.isfile(args.pretrained_model): list_of_models = glob.glob(os.path.join(args.pretrained_model, "*.pth")) args.pretrained_model = max(list_of_models, key=os.path.getctime) logger.debug("Loading model only at: {}".format(args.pretrained_model)) checkpointer.load_model_only(f=args.pretrained_model) model = torch.nn.parallel.DataParallel(model).cuda() # DATA LOADING val_ds, val_collate = create_validation_dataset(args,logger=logger) val_loader = torch.utils.data.DataLoader( val_ds, batch_size=args.batch_size, num_workers=args.decode_threads, collate_fn=val_collate ) avgpool_final_acc, lstm_final_acc, avgpool_mean_ap, lstm_mean_ap, loss_avg = validate(args, 117, val_loader, model, criterion, val_ds=val_ds) print( "Testing Summary for checkpoint: {}\n" "Avgpool Acc: {} \n LSTM Acc: {} \n Avgpool mAP: {} \n LSTM mAP: {}".format( args.pretrained_model, avgpool_final_acc*100, lstm_final_acc*100, avgpool_mean_ap, lstm_mean_ap ) )
def train_model(): """Model training loop.""" logger = logging.getLogger(__name__) model, start_iter, checkpoints, output_dir = create_model() setup_logger(output_dir) if 'final' in checkpoints: # The final model was found in the output directory, so nothing to do return checkpoints setup_model_for_training(model, output_dir) training_stats = TrainingStats(model) CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS) for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER): training_stats.IterTic() lr = model.UpdateWorkspaceLr(cur_iter) workspace.RunNet(model.net.Proto().name) if cur_iter == start_iter: nu.print_net(model) training_stats.IterToc() training_stats.UpdateIterStats() training_stats.LogIterStats(cur_iter, lr) if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter: checkpoints[cur_iter] = os.path.join( output_dir, 'model_iter{}.pkl'.format(cur_iter) ) nu.save_model_to_weights_file(checkpoints[cur_iter], model) if cur_iter == start_iter + training_stats.LOG_PERIOD: # Reset the iteration timer to remove outliers from the first few # SGD iterations training_stats.ResetIterTimer() if np.isnan(training_stats.iter_total_loss): logger.critical('Loss is NaN, exiting...') model.roi_data_loader.shutdown() envu.exit_on_error() # Save the final model checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl') nu.save_model_to_weights_file(checkpoints['final'], model) # Shutdown data loading threads model.roi_data_loader.shutdown() return checkpoints
def main(): args = parse_args() setup_logger() params = { "calendarId": args.calendar, "timeMin": args.min, "timeMax": args.max, "maxResults": 2500, } if args.keyword: params["q"] = args.keyword events = Events() events.fetch(params) filename = datetime.now().strftime("%y%m%d_%H%M%S.csv") events.to_csv(filename)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.local_rank != -1: dist.init_process_group(backend='nccl', init_method='env://') torch.cuda.set_device(args.local_rank) num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 local_rank = args.local_rank logger = setup_logger(__name__, cfg.save_name, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config)) logger.info(cfg._text) train(cfg, args)
def create_simple_exp_name(): """ Create a unique experiment name with a timestamp """ now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') return timestamp std_threshold = 0.1 in_mdp_batch_size = 128 eval_statistics = OrderedDict() logger.reset() setup_logger( log_dir=osp.join('./tune_threshold_loggings', create_simple_exp_name())) filename = f'./goals/ant-dir-normal-goals.pkl' train_goals, wd_goals, ood_goals = pickle.load(open(filename, 'rb')) env = env_producer('ant-dir', 0, train_goals[0]) for epoch in range(200): file_name = osp.join('./data_reward_predictions', f'params_{epoch}.pkl') params = pickle.load(open(file_name, "rb")) obs = params['obs'] actions = params['actions'] rewards = params['rewards'] pred_rewards = params['pred_rewards']
assert len(bcq_buffers) == len(idx_list) # Load ensemble parameters ensemble_params_list = [] for idx in idx_list: params_dir = ensemble_params_dir + str(idx) + '/itr_200.zip_pkl' params = load_gzip_pickle(params_dir) ensemble_params_list.extend( params['trainer']['network_ensemble_state_dict']) # set up logger variant['log_dir'] = get_log_dir(variant) logger.reset() setup_logger(log_dir=variant['log_dir'], snapshot_gap=100, snapshot_mode="gap") logger.log(f"Seed: {seed}") set_seed(seed) logger.log(f'Using GPU: {True}') set_gpu_mode(mode=True, gpu_id=0) experiment(variant, bcq_policies, bcq_buffers, ensemble_params_list, prev_exp_state=None)
import timeago import time import datetime import json # import logging from db.models import TrxKey, TRX, SKey, MKey, KeyLabel, Offer, Bid, Trade, Account, TRCHistory, User, ETHPrice, \ ETHPriceRevision, CXPrice, CXPriceRevision, Heartbeat, HeartbeatComment, HeartbeatCommentBase, HeartbeatUser, \ engine, Base, Session, session COIN = 100000000 trxapp = SimpleNamespace() trxapp.config = {'SECRET_KEY': "jigga does as jigga does"} logger = logging.setup_logger('DB', 'INFO', 'db.log') COINMASTER_USER_ID = 16 async def test_db(): engine = await async_engine(user=db_config.DATABASE['username'], database=db_config.DATABASE['database'], host=db_config.DATABASE['host'], password=db_config.DATABASE['password']) async with engine: async with engine.acquire() as conn: query = select([CXPrice]) async for row in conn.execute(query): print(row)
def borel(doodad_config: DoodadConfig, params): save_doodad_config(doodad_config) log_dir = doodad_config.output_directory exp_name = log_dir.split('/')[-2] setup_logger(logger, variant=params, base_log_dir=None, exp_name=exp_name, log_dir=log_dir) _borel(log_dir, **params)
import logging import time import traceback import tweepy from constants.emails import RATE_LIMIT_ERROR, GENERIC_ERROR from constants.secrets import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET from utils.emails import send_email, send_email_with_results from utils.files import write_json_file, read_json_file from utils.format import format_api_followers, format_file_data, format_unfollowers from utils.logging import setup_logger logger = setup_logger(name='twitter-unfollowers', level=logging.INFO, filename='unfollowers.log') def main(): try: old_followers = read_old_followers() api = connect_to_api() api_followers = handle_rate_limit(tweepy.Cursor(api.followers).items()) current_followers = format_api_followers(api_followers) save(current_followers) unfollower_ids = find_unfollowers(old_followers=old_followers, current_followers=current_followers)
def main(): parser = argparse.ArgumentParser(description='Shallow-CNN Training') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default:64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default:1000)') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train for (default:100)') parser.add_argument('--lr', type=float, default=0.001, metavar='lr', help='learning rate for optimizer (default:0.001)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--early-stopping', type=int, default=10, metavar='N', help='Patience for early stopping (default:10)') parser.add_argument( '--data-dir', type=str, default='../data', metavar='path/to/dir', help='path to directory containing data (default:../data)') parser.add_argument( '--train-size', type=float, default=0.85, metavar='pct', help='fraction of dataset to use for training (default:0.85)') parser.add_argument( '--test-size', type=float, default=0.15, metavar='pct', help='fraction of dataset to use for testing (default:0.15)') parser.add_argument( '--dropout-rate', type=float, default=0.5, metavar='pct', help='dropout rate after convolution layers (default:0.5)') parser.add_argument('--conv1-width', type=int, default=10, metavar='w', help='Width of 1st convolution kernel (default:10)') parser.add_argument( '--n_channels', type=int, default=30, metavar='N', help='Number of channels ouput by convolution layers (default:30)') parser.add_argument( '--max-pool-kernel-size', type=int, default=25, metavar='w', help='Width of max-pool kernel after convolution (default:25)') parser.add_argument('--max-pool-stride', type=int, default=5, metavar='N', help='stride along 2nd axis for max-pool (default:5)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument( '--checkpoint', type=str, default='checkpoint.pt', metavar='path/to/file', help='file to save checkpoints (default:checkpoint.pt)') #TODO add arg to save everything to specific folder # Time id used for saving files time_id = int(time()) args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') torch.manual_seed(SEED) # Load the datsets print('loading datasets') train_set = RobotNavDataset(args.data_dir) submission_set = SubmissionDataset(args.data_dir) train_size = floor(0.8 * len(train_set)) test_size = floor(0.2 * len(train_set)) train_subset, test_subset = data.random_split(train_set, (train_size, test_size)) train_loader = torch.utils.data.DataLoader(train_subset, batch_size=args.batch_size, shuffle=True) # Don't think we actually need shuffle here... test_loader = torch.utils.data.DataLoader(test_subset, batch_size=args.test_batch_size) # Initialize objects print('creating model') model = ShallowCNN(n_channels=args.n_channels, conv1_width=args.conv1_width, max_pool_kernel_size=args.max_pool_kernel_size, max_pool_stride=args.max_pool_stride, dropout_rate=args.dropout_rate) model.double() # TODO: look into if this is actually needed... early_stopper = EarlyStopping(patience=args.early_stopping, check_file=args.checkpoint) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) logfile = '{}.log'.format(time_id) logger = setup_logger(logfile=logfile, console_out=True) loss_func = F.nll_loss # Train the model print('training model') for epoch in range(1, args.epochs + 1): train(model, train_loader, optimizer, loss_func, epoch, log_interval=args.log_interval, log_func=logger.info) test_loss = test(model, test_loader, loss_func, log_func=logger.info) # Early stopper will handle saving the checkpoints if early_stopper(test_loss, model): break print('creating submission') make_submission(model, submission_set.data, 'submission-{}.csv'.format(time_id))
import numpy as np from utils.logging import setup_logger from config import RUN_FOLDER, LOGGER_DISABLED, EPSILON, ALPHA, CPUCT logger_mcts = setup_logger('logger_mcts', RUN_FOLDER + 'logs/logger_mcts.log') logger_mcts.disabled = LOGGER_DISABLED['mcts'] class Node(): def __init__(self, state): self.state = state self.player_turn = state.playerTurn self.id = state.id self.edges = [] def is_leaf(self): return len(self.edges) <= 0 class Edge: def __init__(self, in_node, out_node, prior, action): self.id = in_node.state.id + '|' + out_node.state.id self.inNode = in_node self.outNode = out_node self.playerTurn = in_node.state.playerTurn self.action = action # n is the number of times visited # w is the value from the model # q is w/n # p is the prior from the policy network over probability of making each move
""" Train loop """ from typing import Any, Dict, Optional import torch import torch.nn as nn import torch.utils.data as data_utils import texar.torch as tx from utils.logging import setup_logger logger = setup_logger('__train__') def do_train(model: nn.Module, train_loader: data_utils.DataLoader, valid_loader: data_utils.DataLoader, optimizer, loss_fn: nn.Module = None, scheduler: Optional = None, cfg: Optional = None) -> None: """ Do Train Loop Args: cfg: Dict, config info; model: nn.Module, the model you use; train_loader: train data loader; valid_loader: valid data loader; optimizer: optimizer; scheduler: use to change learing rate; loss_fn: the function to compute the loss;
import logging import smtplib from email.message import EmailMessage from constants.config import FROM_ADDRESS, TO_ADDRESS, SMTP_HOST, SMTP_PORT from constants.emails import SCRIPT_RESULTS, SUCCESSFUL_UNFOLLOW_MSG, UNSUCCESSFUL_UNFOLLOW_MSG, NO_ACTION_NEEDED_MSG from constants.secrets import EMAIL_PASSWORD from utils.format import format_message from utils.logging import setup_logger logger = setup_logger(name=__name__, level=logging.ERROR, filename='emails.log') def send_email(*, subject, content): if not all( [FROM_ADDRESS, TO_ADDRESS, SMTP_HOST, SMTP_PORT, EMAIL_PASSWORD]): return msg = EmailMessage() msg['From'] = FROM_ADDRESS msg['To'] = TO_ADDRESS msg['Subject'] = subject msg.set_content(content) with smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) as smtp: try: smtp.login(FROM_ADDRESS, EMAIL_PASSWORD) smtp.send_message(msg) except Exception as e:
help='how far to search forward from lost offset in recovery state') parser.add_argument( '--recovery_min_match_size', type=int, help='minimum number of symbols for a successful sync in `recovery`') parser.add_argument( '--recovery_backoff_thresh', type=int, help='failed search attempts needed for move `recovery` -> `searching`') args = parser.parse_args() # Configure logging if args.nolog: disable_logging() else: setup_logger(log_dir=args.log_dir, file_level=logging.INFO) # Configure server local_ip = get_server_host() local_port = args.local_port seed_fn = seed_from_flow_id get_seed = partial(seed_from_addresses, seed_fn, recv_addr=(local_ip, local_port)) sequence_args = override_defaults(default_sequence_args, vars(args)) sequence_cls = get_sequence_cls(**sequence_args) reporter_args = map(int, args.reporter_args.split()) accumulator = accumulators.get(args.accumulator_name, None)
import os import glob from pathlib import Path from typing import Union, List import json import re import shutil import yaml from attrdict import AttrDict from utils.logging import setup_logger logger = setup_logger(__name__) def read_yaml(path: List[Union[str, list]]) -> AttrDict: """yamlを読み込み, dictのkeyをattrとするインスタンスをreturn Parameters ---------- path: str or list Return: obj AttrDict """ if isinstance(path, str): obj = _read_yaml(path) elif isinstance(path, list): obj = dict()
def run(train_config, logger, **kwargs): logger = logging.getLogger('UDA') if getattr(train_config, 'debug', False): setup_logger(logger, logging.DEBUG) # Set Polyaxon environment if needed plx_logger = None save_dir = None output_experiment_path = None try: plx_logger = PolyaxonLogger() experiment = plx_logger.experiment save_dir = get_outputs_path() output_experiment_path = get_outputs_refs_paths() output_experiment_path = output_experiment_path['experiments'][ 0] if output_experiment_path else None logger.debug("Experiment info: {}".format( experiment.get_experiment_info())) except PolyaxonClientException as e: logger.warning('Logger Polyaxon : ' + str(e)) # Path configuration saves_dict = getattr(train_config, 'saves', {}) save_dir = saves_dict.get('save_dir', '') if save_dir is None else save_dir log_dir = os.path.join(save_dir, saves_dict.get('log_dir', '')) save_model_dir = os.path.join(save_dir, saves_dict.get('model_dir', '')) save_prediction_dir = os.path.join(save_dir, saves_dict.get('prediction_dir', '')) save_config_dir = os.path.join(save_dir, saves_dict.get('config_dir', '')) load_model_file = saves_dict.get('load_model_file', '') load_optimizer_file = saves_dict.get('load_optimizer_file', '') # Create folders create_save_folders(save_dir, saves_dict) if output_experiment_path is not None: model_dir = saves_dict.get('model_dir', '') load_model_file = os.path.join( output_experiment_path, model_dir, load_model_file) if load_model_file else None load_optimizer_file = os.path.join( output_experiment_path, model_dir, load_optimizer_file) if load_optimizer_file else None num_epochs = getattr(train_config, 'num_epochs') num_classes = getattr(train_config, 'num_classes') device = getattr(train_config, 'device', 'cpu') # Set magical acceleration if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True else: assert device == 'cpu', 'CUDA device selected but none is available' # Set half precision if required use_fp_16 = getattr(train_config, 'use_fp_16', False) train1_sup_loader = getattr(train_config, 'train1_sup_loader') train1_unsup_loader = getattr(train_config, 'train1_unsup_loader') train2_unsup_loader = getattr(train_config, 'train2_unsup_loader') test_loader = getattr(train_config, 'test_loader') save_interval = saves_dict.get('save_interval', 0) n_saved = saves_dict.get('n_saved', 0) val_interval = getattr(train_config, 'val_interval', 1) pred_interval = getattr(train_config, 'pred_interval', 0) model = getattr(train_config, 'model').to(device) optimizer = getattr(train_config, 'optimizer') criterion = getattr(train_config, 'criterion').to(device) consistency_criterion = getattr(train_config, 'consistency_criterion').to(device) cm_metric = getattr( train_config, 'cm_metric', ConfusionMatrix(num_classes=num_classes, output_transform=lambda x: (x['y_pred'], x['y']))) # AMP initialization for half precision if use_fp_16: assert 'cuda' in device assert torch.backends.cudnn.enabled, "NVIDIA/Apex:Amp requires cudnn backend to be enabled." try: from apex import amp except: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to run this example." ) # Initialize amp model, optimizer = amp.initialize(model, optimizer, opt_level="O2") # Load checkpoint load_params(model, optimizer=optimizer, model_file=load_model_file, optimizer_file=load_optimizer_file, device_name=device) # Add batch norm is_bn = getattr(train_config, 'is_bn', False) if is_bn: batch_norm = nn.BatchNorm2d(3).to(device) if use_fp_16: batch_norm = amp.initialize(batch_norm) batch_norm.reset_parameters() model = nn.Sequential(batch_norm, model) # Copy the config file shutil.copy2(os.path.abspath(train_config.__file__), os.path.join(save_config_dir, 'checkpoint_module.py')) le = len(train1_sup_loader) num_train_steps = le * num_epochs mlflow.log_param("num train steps", num_train_steps) lr = getattr(train_config, 'learning_rate') num_warmup_steps = getattr(train_config, 'num_warmup_steps', 0) lr_scheduler = getattr(train_config, 'lr_scheduler', None) if lr_scheduler is not None: lr_scheduler = lr_scheduler(optimizer) if num_warmup_steps > 0: lr_scheduler = create_lr_scheduler_with_warmup( lr_scheduler, warmup_start_value=0.0, warmup_end_value=lr * (1.0 + 1.0 / num_warmup_steps), warmup_duration=num_warmup_steps) train1_sup_loader_iter = cycle(train1_sup_loader) train1_unsup_loader_iter = cycle(train1_unsup_loader) train2_unsup_loader_iter = cycle(train2_unsup_loader) # Reduce on plateau reduce_on_plateau = getattr(train_config, 'reduce_on_plateau', None) # Output transform model output_transform_model = getattr(train_config, 'output_transform_model', lambda x: x) inference_fn = getattr(train_config, 'inference_fn', inference_standard) lam = getattr(train_config, 'consistency_lambda') beta = getattr(train_config, 'consistency_beta', lam) tsa = TrainingSignalAnnealing( num_steps=num_train_steps, min_threshold=getattr(train_config, 'TSA_proba_min'), max_threshold=getattr(train_config, 'TSA_proba_max')) with_tsa = getattr(train_config, 'with_TSA', False) cfg = { 'tsa': tsa, 'lambda': lam, 'beta': beta, 'with_tsa': with_tsa, 'device': device, 'consistency_criterion': consistency_criterion, 'criterion': criterion } trainer = Engine( partial(train_update_function, model=model, optimizer=optimizer, cfg=cfg, train1_sup_loader_iter=train1_sup_loader_iter, train1_unsup_loader_iter=train1_unsup_loader_iter, train2_unsup_loader_iter=train2_unsup_loader_iter, output_transform_model=output_transform_model, use_fp_16=use_fp_16)) # Register events for e in CustomEvents: State.event_to_attr[e] = 'iteration' trainer.register_events(*CustomEvents) if with_tsa: trainer.add_event_handler(Events.ITERATION_COMPLETED, log_tsa, tsa) if lr_scheduler is not None: if not hasattr(lr_scheduler, "step"): trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) else: trainer.add_event_handler(Events.ITERATION_STARTED, lambda engine: lr_scheduler.step()) trainer.add_event_handler(Events.ITERATION_COMPLETED, log_learning_rate, optimizer) metric_names = [ 'supervised batch loss', 'consistency batch loss', 'final batch loss' ] def output_transform(x, name): return x[name] for n in metric_names: RunningAverage( output_transform=partial(output_transform, name=n)).attach( trainer, n) ProgressBar(persist=True, bar_format="").attach(trainer, event_name=Events.EPOCH_STARTED, closing_event_name=Events.COMPLETED) # Handlers for Tensorboard logging tb_logger = TensorboardLogger(log_dir=log_dir) tb_logger.attach(trainer, log_handler=tbOutputHandler(tag="train", metric_names=metric_names), event_name=CustomEvents.ITERATION_K_COMPLETED) tb_logger.attach(trainer, log_handler=tbOptimizerParamsHandler(optimizer, param_name="lr"), event_name=CustomEvents.ITERATION_K_STARTED) # Handlers for Polyaxon logging if plx_logger is not None: plx_logger.attach(trainer, log_handler=plxOutputHandler( tag="train", metric_names=metric_names), event_name=CustomEvents.ITERATION_K_COMPLETED) metrics = { 'loss': Loss(criterion, output_transform=lambda x: (x['y_pred'], x['y'])), 'mAcc': cmAccuracy(cm_metric).mean(), 'mPr': cmPrecision(cm_metric).mean(), 'mRe': cmRecall(cm_metric).mean(), 'mIoU': mIoU(cm_metric), 'mF1': cmFbeta(cm_metric, 1).mean() } iou = IoU(cm_metric) for i in range(num_classes): key_name = 'IoU_{}'.format(str(i)) metrics[key_name] = iou[i] inference_update_fn = partial( inference_update_function, model=model, cfg=cfg, output_transform_model=output_transform_model, inference_fn=inference_fn) evaluator = Engine(inference_update_fn) train_evaluator = Engine(inference_update_fn) for name, metric in metrics.items(): metric.attach(train_evaluator, name) metric.attach(evaluator, name) # Add checkpoint if save_model_dir: checkpoint = ModelCheckpoint(dirname=save_model_dir, filename_prefix='checkpoint', save_interval=save_interval, n_saved=n_saved, create_dir=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint, { 'mymodel': model, 'optimizer': optimizer }) def trigger_k_iteration_started(engine, k): if engine.state.iteration % k == 0: engine.fire_event(CustomEvents.ITERATION_K_STARTED) def trigger_k_iteration_completed(engine, k): if engine.state.iteration % k == 0: engine.fire_event(CustomEvents.ITERATION_K_COMPLETED) def run_validation(engine, validation_interval): if (trainer.state.epoch - 1) % validation_interval == 0: train_evaluator.run(train1_sup_loader) evaluator.run(test_loader) if save_prediction_dir: train_output = train_evaluator.state.output test_output = evaluator.state.output iteration = str(trainer.state.iteration) epoch = str(trainer.state.epoch) save_prediction('train_{}_{}'.format(iteration, epoch), save_prediction_dir, train_output['x'], torch.argmax( train_output['y_pred'][0, :, :, :], dim=0), y=train_output['y'][0, :, :]) save_prediction('test_{}_{}'.format(iteration, epoch), save_prediction_dir, test_output['x'], torch.argmax(test_output['y_pred'][0, :, :, :], dim=0), y=test_output['y'][0, :, :]) train_evaluator.state.output = None evaluator.state.output = None if reduce_on_plateau is not None: reduce_on_plateau.step(evaluator.state.metrics['mIoU']) trainer.add_event_handler(Events.ITERATION_STARTED, trigger_k_iteration_started, k=10) trainer.add_event_handler(Events.ITERATION_COMPLETED, trigger_k_iteration_completed, k=10) trainer.add_event_handler(Events.EPOCH_STARTED, run_validation, validation_interval=val_interval) trainer.add_event_handler(Events.COMPLETED, run_validation, validation_interval=1) def trainer_prediction_save(engine, prediction_interval): if (engine.state.iteration - 1) % prediction_interval == 0: if save_prediction_dir: trainer_output = trainer.state.output['unsup pred'] iteration = str(trainer.state.iteration) epoch = str(trainer.state.epoch) save_prediction('trainer_{}_{}'.format(iteration, epoch), save_prediction_dir, trainer_output['x'], trainer_output['y_pred']) logger.debug( 'Saved trainer prediction for iteration {}'.format( str(engine.state.iteration))) trainer.state.output = None trainer.add_event_handler(Events.ITERATION_COMPLETED, trainer_prediction_save, prediction_interval=pred_interval) tb_logger.attach(train_evaluator, log_handler=tbOutputHandler(tag="train", metric_names=list( metrics.keys())), event_name=Events.EPOCH_COMPLETED) tb_logger.attach(evaluator, log_handler=tbOutputHandler(tag="test", metric_names=list( metrics.keys())), event_name=Events.EPOCH_COMPLETED) # Handlers for Polyaxon logging if plx_logger is not None: plx_logger.attach(train_evaluator, log_handler=plxOutputHandler(tag="train", metric_names=list( metrics.keys())), event_name=Events.EPOCH_COMPLETED) plx_logger.attach(evaluator, log_handler=plxOutputHandler(tag="test", metric_names=list( metrics.keys())), event_name=Events.EPOCH_COMPLETED) trainer.add_event_handler(Events.ITERATION_COMPLETED, mlflow_batch_metrics_logging, "train", trainer) train_evaluator.add_event_handler(Events.COMPLETED, mlflow_val_metrics_logging, "train", trainer) evaluator.add_event_handler(Events.COMPLETED, mlflow_val_metrics_logging, "test", trainer) data_steps = list(range(len(train1_sup_loader))) logger.debug('Start training') trainer.run(data_steps, max_epochs=num_epochs) logger.debug('Finished training')
'--offset', type=int, help='start offset of sequence') parser.add_argument('-p', '--period', type=int, help='sequence period') parser.add_argument('-r', '--rate', type=int, help='sending rate in pps') parser.add_argument('-s', '--symbol_bits', type=int, help='number of bits for each symbol') args = parser.parse_args() # Configure logging if args.nolog: disable_logging() else: setup_logger(log_dir=args.log_dir) # Configure client local_host, local_port = get_client_host(), args.local_port remote_host, remote_port = parse_server_addr(args.remote_addr) seed_fn = seed_from_flow_id sequence_args = override_defaults(default_sequence_args, vars(args)) sequence_cls = get_sequence_cls(**sequence_args) send_sequence = partial(send_sequence, sequence_cls=sequence_cls) sending_rate = args.rate or DEFAULT_SENDING_RATE offset = args.offset or DEFAULT_OFFSET # Print settings
def main(args): os.makedirs(args.checkpoint_path, exist_ok=True) # Setup logging system logger = setup_logger( "Listen_to_look, audio_preview classification single modality", args.checkpoint_path, True) logger.debug(args) # Epoch logging epoch_log = setup_logger("Listen_to_look: results", args.checkpoint_path, True, logname="epoch.log") epoch_log.info("epoch,loss,acc,lr") writer = None if args.visualization: writer = setup_tbx(args.checkpoint_path, True) if writer is not None: logger.info("Allowed Tensorboard writer") # Define the model builder = ModelBuilder() net_classifier = builder.build_classifierNet(args.embedding_size, args.num_classes).cuda() net_imageAudioClassify = builder.build_imageAudioClassifierNet( net_classifier, args).cuda() model = builder.build_audioPreviewLSTM(net_classifier, args) model = model.cuda() # DATA LOADING train_ds, train_collate = create_training_dataset(args, logger=logger) val_ds, val_collate = create_validation_dataset(args, logger=logger) train_loader = torch.utils.data.DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.decode_threads, collate_fn=train_collate) val_loader = torch.utils.data.DataLoader(val_ds, batch_size=args.batch_size, num_workers=4, collate_fn=val_collate) args.iters_per_epoch = len(train_loader) args.warmup_iters = args.warmup_epochs * args.iters_per_epoch args.milestones = [args.iters_per_epoch * m for m in args.milestones] # define loss function (criterion) and optimizer criterion = {} criterion['CrossEntropyLoss'] = nn.CrossEntropyLoss().cuda() if args.freeze_imageAudioNet: param_groups = [{ 'params': model.queryfeature_mlp.parameters(), 'lr': args.lr }, { 'params': model.prediction_fc.parameters(), 'lr': args.lr }, { 'params': model.key_conv1x1.parameters(), 'lr': args.lr }, { 'params': model.rnn.parameters(), 'lr': args.lr }, { 'params': net_classifier.parameters(), 'lr': args.lr }] optimizer = torch.optim.SGD(param_groups, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=1) else: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=1) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.milestones) # make optimizer scheduler if args.scheduler: scheduler = default_lr_scheduler(optimizer, args.milestones, args.warmup_iters) cudnn.benchmark = True # setting up the checkpointing system write_here = True checkpointer = Checkpointer(model, optimizer, save_dir=args.checkpoint_path, save_to_disk=write_here, scheduler=scheduler, logger=logger) if args.pretrained_model is not None: logger.debug("Loading model only at: {}".format(args.pretrained_model)) checkpointer.load_model_only(f=args.pretrained_model) if checkpointer.has_checkpoint(): # call load checkpoint logger.debug("Loading last checkpoint") checkpointer.load() model = torch.nn.parallel.DataParallel(model).cuda() logger.debug(model) # Log all info if writer: writer.add_text("namespace", repr(args)) writer.add_text("model", str(model)) # # TRAINING # logger.debug("Entering the training loop") for epoch in range(args.start_epoch, args.epochs): # train for one epoch train_accuracy, train_loss = train_epoch(args, epoch, train_loader, model, criterion, optimizer, scheduler, logger, epoch_logger=epoch_log, checkpointer=checkpointer, writer=writer) test_map, test_accuracy, test_loss, _ = validate( args, epoch, val_loader, model, criterion, epoch_logger=epoch_log, writer=writer) if writer is not None: writer.add_scalars('training_curves/accuracies', { 'train': train_accuracy, 'val': test_accuracy }, epoch) writer.add_scalars('training_curves/loss', { 'train': train_loss, 'val': test_loss }, epoch)