Beispiel #1
0
def is_finite(s):
    try:
        f = float(s)
        if math_isnan(f) or math_abs(f) == INFINITY:
            return False
        return True
    except Exception:
        return False
Beispiel #2
0
def is_number(s):
    if s is True or s is False or s == None:
        return False

    try:
        s = float(s)
        return not math_isnan(s)
    except Exception:
        return False
Beispiel #3
0
def SUM(values):
    output = Null
    for v in values:
        if v == None:
            continue
        if isinstance(v, float) and math_isnan(v):
            continue
        if output == None:
            output = v
            continue
        output += v
    return output
Beispiel #4
0
def PRODUCT(values, *others):
    if len(others) > 0:
        from mo_logs import Log
        Log.error("no longer accepting args, use a single list")

    output = Null
    for v in values:
        if v == None:
            continue
        if isinstance(v, float) and math_isnan(v):
            continue
        if output == None:
            output = v
            continue
        output *= v
    return output
Beispiel #5
0
def is_nan(s):
    return s == None or math_isnan(s)
Beispiel #6
0
def train(model_dir=None,
          params='default',
          data_dir='data',
          epochs=15,
          batch_size=500,
          retrain=None,
          train_steps=None,
          test_steps=None,
          debug_mode=False):
    """
    :param model_dir: куда сохранять результаты обучения (при debug_mode=False)
                        if None, model_dir = date_time
    :param params: dict with train and feature params.
                    if params == 'default' take params from params.py
    :param data_dir: dir with: npy/ , data.csv
    :param retrain: path/to/model.pt that we need to re-train
    :param train_steps: сколько батчей прогонять в каждой эпохи
                    if None, all batches
    :param test_steps: сколько тестовых батчей прогонять после каждой эпохи
                        if None, all Test Set
    :param debug_mode: if True, without save model, summary and logs
    """

    # get train params
    if params == 'default':
        params = parametres  # see params.py

    if not debug_mode:
        if not model_dir:
            # create model_dir
            model_dir = datetime.now().strftime("%b%d-%H:%M_run")
            if retrain:
                model_dir = model_dir.replace('run', 'retrain')
        os.makedirs(os.path.join(model_dir, 'saves'))
        print('Model will store in: {}'.format(model_dir), flush=True)
        # -model_dir/saves
        # -model_dir/train.log
        # -model_dir/test.log
        # -model_dir/test.csv
        # -model_dir/train.csv
    else:
        print('Debug mode. No saves and no logs')

    # logging
    if not debug_mode:
        logfile = os.path.join(model_dir, 'train.log')
        print('\nTrain logs to: {}\n'.format(log_file), flush=True)
    else:
        logfile = None  # logs to console

    if logging.getLogger().hasHandlers():  # if already logger exists
        change_logger(logging, logfile)
    else:
        logging.basicConfig(filename=logfile,
                            format="%(message)s",
                            level=logging.INFO)

    # info about parametres
    logging.info('Parametres:\n {}\n'.format(params))

    # split train and test Sets
    logging.info('Split train and test Sets...')
    train_csv, test_csv = split_train_test(data_dir, model_dir)

    # load train data
    train = AudioDataset(train_csv, data_dir, params)
    input_shape = train.get_input_shape()
    logging.info('Input shape: {}\n'.format(input_shape))
    sampler1 = BucketingSampler(train, batch_size)
    train = DataLoaderCuda(train,
                           collate_fn=collate_audio,
                           batch_sampler=sampler1)

    # load test data
    test = AudioDataset(test_csv, data_dir, params)
    sampler2 = BucketingSampler(test, batch_size)
    test = DataLoaderCuda(test,
                          collate_fn=collate_audio,
                          batch_sampler=sampler2)

    # init model
    model = model_init(params,
                       train=True,
                       model_path=retrain,
                       use_cuda=True,
                       logger=logging)

    # select optimizer
    if params['opt'] == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=params['weight_decay'])
    else:
        raise Exception('No optimizer: {}'.format(params['opt']))

    # reduce learning rate every 2 epochs
    scheduler = StepLR(optimizer, step_size=params['lr_reduce_ep'], gamma=0.1)

    # summary writer
    if not debug_mode:
        log_dir = os.path.join(params['logdir'], model_dir)
        writer_train = SummaryWriter(log_dir=os.path.join(log_dir, 'train'))
        writer_test = SummaryWriter(log_dir=os.path.join(log_dir, 'test'))
        writer_train.add_graph(model, torch.rand(1, *input_shape))
        logging.info('Logs for this model restored at {}'.format(log_dir))
    else:
        writer_test = writer_train = None

    loss = torch.nn.CrossEntropyLoss()

    # n batches
    n_train = train_steps if train_step else len(train)
    n_test = test_steps if test_steps else len(test)
    k = round(n_train / n_test)

    # train and test step
    train_step, test_step = 0, 0

    # best_metric init
    best_loss = 1000

    for ep in range(1, epochs + 1):
        logging.info('\n-------------- {} epoch --------------'.format(ep))
        print('{}/{} Epoch...'.format(ep, epochs))

        model.train()
        train.shuffle(ep)
        for i, (x, target) in enumerate(train):
            optimizer.zero_grad()  # обнуление предыдущих градиентов

            logits, probs = model(x)
            # logits - before activation (for loss)
            # probs - after activation   (for acc)

            # CrossEntropy loss
            output = loss(logits, target)  # is graph (for backward)
            loss_value = output.item()  # is float32

            # in case of learning crash
            if tensor.isnan(loss_value) or math_isnan(loss_value):
                message = 'Loss is nan on {} train step. Learning crash!'.format(
                    train_step)
                logging.info(message)
                print(message)
                return

            # accuracy
            acc_value = accuracy(probs, target)

            # summary
            if not debug_mode and train_step % k == 0:
                writer_train.add_scalar('Loss/steps', loss_value, train_step)
                writer_train.add_scalar('Accuracy/steps', acc_value,
                                        train_step)

            # обратное распр-е ошибки.
            # для каждого параметра модели w считает w.grad
            # здесь НЕ обновляются веса!
            output.backward()

            clip_grad_norm_(model.parameters(),
                            params['grad_norm'])  # prevent exploding gradient

            # здесь обновление весов
            # w_new = w_old - lr * w.grad
            optimizer.step()

            logging.info('| Epoch {}: {}/{} | Loss {:.3f} | Acc {:.2f}'.format(
                ep, i + 1, n_train, loss_value, acc_value))

            train_step += 1

            # interrupt
            if train_steps and i + 1 == train_steps:
                break

        scheduler.step()
        new_lr = float(optimizer.param_groups[0]['lr'])
        logging.info('Updated learning rate: {}'.format(new_lr))

        # saving
        # model_dir/saves/ep_1.pt
        save_name = os.path.join(model_dir, 'saves', 'ep_{}.pt'.format(ep))
        if not debug_mode:
            save_weights(model, save_name, train_step)

        logging.info('\n------------- Test ---------------')
        # test logger setup
        if not debug_mode:
            test_logfile = os.path.join(model_dir, 'test.log')
            change_logger(logging, test_logfile)
            logging.info('Test results to: {}'.format(test_logfile))

        avg_metrics = test(model=model,
                           model_path=save_name,
                           params=params,
                           data_test=test,
                           data_dir=data_dir,
                           test_csv=test_csv,
                           writer=writer_test,
                           step=test_step,
                           batch_size=batch_size,
                           total_steps=test_steps,
                           use_tb=not debug_mode,
                           logfile=logging)
        message = ''
        for k, v in avg_metrics:
            message += '{}: {}\n'.format(k, v)

        # check whether it's the best metrics
        if avg_metrics['loss'] < best_loss:
            best_loss = avg_metrics['loss']
            message = 'New best results'
            logging.info(message)
            print(message)

    if not debug_mode:
        writer_train.close()
        writer_test.close()