예제 #1
0
파일: trainer.py 프로젝트: aihill/kaggle-1
    def __init__(self,
                 model: torch.nn.Module,
                 criterion: torch.nn.Module,
                 optimizer: torch.optim.Optimizer,
                 metrics: Dict[str, Metric],
                 data_loaders: AttrDict,
                 max_norm: float = None,
                 norm_type: int = 2,
                 scheduler: torch.optim.lr_scheduler._LRScheduler = None,
                 is_iteration_scheduler: bool = False,
                 device: torch.cuda.device = None,
                 mixed_precision: bool = False,
                 backup_path: str = None,
                 name: str = 'trainer',
                 logger: Logger = None,
                 finished_epochs: int = 0):
        if mixed_precision:
            model = network_to_half(model)

        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.metrics = metrics
        self.data_loaders = data_loaders
        self.max_norm = max_norm
        self.norm_type = norm_type
        self.scheduler = scheduler
        self.is_iteration_scheduler = is_iteration_scheduler
        self.device = device
        self.backup_path = backup_path
        self.finished_epochs = finished_epochs
        self.finished_iterations = finished_epochs * len(data_loaders.train)

        self.name = name
        self.logger = logger or get_logger()
        self._progress_bar = None
        self._description = 'ITERATION - loss: {:.3f}'

        self._trainer = create_supervised_trainer(
            model=model,
            optimizer=optimizer,
            loss_fn=criterion,
            max_norm=max_norm,
            norm_type=norm_type,
            device=device,
            mixed_precision=mixed_precision)
        self._register_handlers(self._trainer)
        self._evaluator = create_supervised_evaluator(model, metrics, device)
        self._epoch = 0
        self._iteration = 0
        self._train_loss = ExponentialMovingAverage()
예제 #2
0
import time
import datetime
import requests
from typing import Dict

from bot.service.notifications_service import send_notifications
from commons.constants import MIN_AGE, MIN_CAPACITY, districts, cowin_host, calender_api, cowin_request_headers, \
    DISTRICT_ITERATION_INTERVAL_SECONDS, DISTRICT_SKIP_INTERVAL, COMPLETE_ITERATION_INTERVAL_SECONDS, \
    calender_api_public
from commons.utils import get_logger

logger = get_logger(__name__)

# stores last fetched available slots for a session_id
slot_cache: Dict[str, int] = {}
# stores skip interval for district id
district_cache: Dict[str, int] = {}


def check_slots_in_response(response):
    found_new_free_slots = False
    centers = response.get("centers")
    for center in centers:
        for session in center.get('sessions'):
            session_id = session.get('session_id')
            available_capacity = session.get('available_capacity')
            min_age_limit = session.get('min_age_limit')
            date = session.get('date')
            if min_age_limit == MIN_AGE and MIN_CAPACITY <= available_capacity != slot_cache.get(
                    session_id, -1):
                slot_cache[session_id] = available_capacity
예제 #3
0
    for threshold in [0.5, 0.3, 0.2]:
        logger.info(f'Threshold: {threshold:.2f}')

        thresholds = np.zeros(HumanProteinDataset.NUM_CLASSES) + threshold
        utils.eval_thresholds(y_true_train, y_pred_train, thresholds, 'train')
        utils.eval_thresholds(y_true_valid, y_pred_valid, thresholds, 'valid')

        path = f'{config.submission_path}/{config.exp}_{int(10 * threshold):02}.csv'
        utils.save_submission(y_pred_test > threshold, y_true_test, path)


if __name__ == '__main__':
    remove_resource_limits()

    init_logger(f'{config.exp}_sz{config.image_size}_x{config.batch_size}', config.log_path, config.tensorboard_path)
    logger = get_logger()
    logger.info(f'PID: {os.getpid()}')

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    default_loaders, loaders = get_loaders(path=config.data_path,
                                           image_size=config.image_size,
                                           n_splits=config.k_fold,
                                           test_size=config.test_size,
                                           batch_size=config.batch_size,
                                           num_workers=config.num_workers,
                                           external=config.external_data,
                                           use_sampler=config.use_sampler)

    metrics = {
        'loss': Loss(FocalLoss()),
예제 #4
0
파일: train.py 프로젝트: aihill/kaggle-1
def train_model(name: str,
                model: torch.nn.Module,
                data_loaders: AttrDict,
                metrics: Dict,
                device: torch.cuda.device,
                lr: float,
                num_epochs: List[int],
                cycles_len: List[int],
                lr_divs: List[int],
                mixed_precision: bool = False,
                backup_path: str = None):
    fix_seed()
    logger = get_logger()

    num_batches = len(data_loaders.train)
    criterion = BCEWithLogitsLoss()

    param_groups = [
        [model.conv1, model.bn1, model.layer1, model.layer2],
        [model.layer3, model.layer4],
        [model.last_linear]
    ]

    lrs = np.array([lr / 10, lr / 3, lr])

    logger.info(f'Learning rate: {lr:.5f}')
    logger.info(f'Learning rates: {lrs}')

    finished_epochs = 0
    for it, (epochs, lr_div, cycle_len) in enumerate(zip(num_epochs, lr_divs, cycles_len)):
        logger.info('Creating new trainer...')
        logger.info(f'Epochs: {epochs}')
        logger.info('Optimizer: Adam')

        if lr_div:
            optimizer = Adam(get_group_params(param_groups, lrs / lr_div), lr=lr)
            logger.info(f'Learning rate divider: {lr_div}')
        else:
            optimizer = Adam(model.parameters(), lr=lr)

        if cycle_len:
            scheduler = CircularLR(optimizer, cycle_len=cycle_len * num_batches, lr_div=10, cut_div=30)
            logger.info(f'Scheduler: {scheduler}')
        else:
            scheduler = None

        trainer = Trainer(name=name,
                          model=model,
                          criterion=criterion,
                          optimizer=optimizer,
                          metrics=metrics,
                          data_loaders=data_loaders,
                          max_norm=1,
                          scheduler=scheduler,
                          is_iteration_scheduler=True,
                          device=device,
                          mixed_precision=mixed_precision,
                          finished_epochs=finished_epochs)
        trainer.train(num_epochs=epochs)
        finished_epochs += epochs
        trainer.save_checkpoint(f'{backup_path}/checkpoint_{name}_{finished_epochs:02}.pth')
        unfreeze(model)