def _save_fitable(self, run: Run, fitable: Model):
     """
     :param run: sacred.Run object. see sacred documentation for more details on utility.
     :param fitable: tensorflow.keras.Model object.
     """
     path = self.exp_config["run_config"]["model_path"]
     if self.exp_config["run_config"]["save_verbosity"] > 0:
         fitable.summary()
     fitable.save(self.exp_config["run_config"]["model_path"])
     run.add_artifact(path)
Ejemplo n.º 2
0
def train(train_corpus: str,
          dev_corpus: str,
          c1: float = 0.0,
          c2: float = 0.0,
          algorithm: str = 'lbfgs',
          max_iterations: int = 100,
          all_possible_transitions: bool = False,
          window_size: int = 1,
          model_filename: str = None,
          _run: Run = None,
          _log: logger = None):
    """
    running crf experiment
    """
    _run.add_resource(train_corpus)
    _run.add_resource(dev_corpus)
    train_sents, _ = get_tagged_sents_and_words(train_corpus)
    dev_sents, _ = get_tagged_sents_and_words(dev_corpus)

    X_train = [sent2features(s, window_size) for s in train_sents]
    y_train = [sent2labels(s) for s in train_sents]

    X_dev = [sent2features(s, window_size) for s in dev_sents]
    y_dev = [sent2labels(s) for s in dev_sents]

    crf = sklearn_crfsuite.CRF(
        algorithm=algorithm,
        c1=c1,
        c2=c2,
        max_iterations=max_iterations,
        all_possible_transitions=all_possible_transitions,
        model_filename=model_filename,
    )

    crf.fit(X_train, y_train)
    y_pred = crf.predict(X_dev)
    overall, by_type = evaluate(y_dev, y_pred)
    _run.info[f'overall_f1'] = overall.f1_score
    _run.log_scalar('overall_f1', overall.f1_score)
    _run.info[f'overall_precision'] = overall.precision
    _run.log_scalar('overall_precision', overall.precision)
    _run.info[f'overall_recall'] = overall.recall
    _run.log_scalar('overall_recall', overall.recall)
    _log.info(f'Overall F1 score: {overall.f1_score}')
    for _, key in enumerate(sorted(by_type.keys())):
        for metric_key in by_type[key]._fields:
            metric_val = getattr(by_type[key], metric_key)
            _run.info[f'{key}-{metric_key}'] = metric_val
            _run.log_scalar(f'{key}-{metric_key}', metric_val)
            _log.info(f'{key}-{metric_key}: {metric_val}')
    if model_filename is not None:
        _log.info(f'saving to: {model_filename}.pkl')
        joblib.dump(crf, f'{model_filename}.pkl')
        _run.add_artifact(f'{model_filename}.pkl')
Ejemplo n.º 3
0
def train(train_corpus: str,
          dev_corpus: str,
          pacrf: str,
          model_filename: str,
          labels: List,
          c1: float = 0.0,
          c2: float = 1.0,
          algorithm: str = 'lbfgs',
          max_iterations: int = None,
          all_possible_transitions: bool = False,
          window_size: int = 0,
          _run: Run = None,
          _log: logger = None):
    """
    running crf experiment
    """
    _run.add_resource(train_corpus)
    _run.add_resource(dev_corpus)
    train_sents, _ = get_tagged_sents_and_words(train_corpus)
    dev_sents, _ = get_tagged_sents_and_words(dev_corpus)

    tmp_train = tempfile.NamedTemporaryFile(mode='w+')
    # temp_train_corpus = open(f'{model_filename}-{train_corpus}.feature', mode='w+')
    print_corpus(train_sents, labels, tmp_train, window_size=window_size)

    # X_dev = [sent2features(s, window_size) for s in dev_sents]
    y_dev = [sent2labels_colmap(s, col=1) for s in dev_sents]

    tmp_dev = tempfile.NamedTemporaryFile(mode='w+')
    # temp_test_corpus = open(f'{model_filename}-{test_corpus}.feature', mode='w+')
    print_corpus(dev_sents, labels, tmp_dev, window_size=window_size)

    # to call partial-crf via Popen command
    # command = f'{pacrf} learn -m {model_filename} -a {algorithm} {temp_train_corpus}'
    # call([pacrf, "--help"])

    crfsuire_proc = Popen([pacrf, "learn", "-m", model_filename, "-a", algorithm, \
                           "-p", f"c1={c1}", "-p", f"c2={c2}", tmp_train.name])
    out, err = crfsuire_proc.communicate()
    print(out)
    print(err)
    # os.system(f'{pacrf} learn -m {model_filename} -a {algorithm} {tmp_train.name}')

    tmp_train.close()

    tmp_pred = tempfile.NamedTemporaryFile(mode='w+')

    # cmd_out([pacrf, "tag", "-m", model_filename, tmp_dev.name, ">", tmp_pred.name])

    _run.add_artifact(model_filename)

    # TODO modified this to call partial-crf via Popen command
    # y_pred = crf.predict(X_dev)
    y_pred = get_tagged_sents_and_words(tmp_pred.name)
    print(y_pred)
    y_pred = [sent2labels_colmap(s, 0) for s in y_pred]

    # TODO modified this to read partial-crf via tempfile
    overall, by_type = evaluate(y_dev, y_pred)
    tmp_pred.close()
    tmp_dev.close()

    _run.info[f'overall_f1'] = overall.f1_score
    _run.log_scalar('overall_f1', overall.f1_score)
    _run.info[f'overall_precision'] = overall.precision
    _run.log_scalar('overall_precision', overall.precision)
    _run.info[f'overall_recall'] = overall.recall
    _run.log_scalar('overall_recall', overall.recall)
    _log.info(f'Overall F1 score: {overall.f1_score}')
    for _, key in enumerate(sorted(by_type.keys())):
        for metric_key in by_type[key]._fields:
            metric_val = getattr(by_type[key], metric_key)
            _run.info[f'{key}-{metric_key}'] = metric_val
            _run.log_scalar(f'{key}-{metric_key}', metric_val)
            _log.info(f'{key}-{metric_key}: {metric_val}')
Ejemplo n.º 4
0
def sacred_main(_run: Run, seed, showoff, out_dir, batch_size, epochs, tags,
                model_desc, experiment_id, weights, train_examples,
                val_examples, deterministic, train_datasets, val_datasets, lr,
                lr_milestones, lr_gamma, optim_algorithm):
    seed_all(seed)
    init_algorithms(deterministic=deterministic)

    exp_out_dir = None
    if out_dir:
        exp_out_dir = path.join(out_dir, experiment_id)
        makedirs(exp_out_dir, exist_ok=True)
    print(f'Experiment ID: {experiment_id}')

    ####
    # Model
    ####

    if weights is None:
        model = create_model(model_desc)
    else:
        details = torch.load(weights)
        model_desc = details['model_desc']
        model = create_model(model_desc)
        model.load_state_dict(details['state_dict'])
    model.to(global_opts['device'])

    print(json.dumps(model_desc, sort_keys=True, indent=2))

    ####
    # Data
    ####

    train_loader = create_train_dataloader(train_datasets, model.data_specs,
                                           batch_size, train_examples)
    if len(val_datasets) > 0:
        val_loader = create_val_dataloader(val_datasets, model.data_specs,
                                           batch_size, val_examples)
    else:
        val_loader = None

    ####
    # Reporting
    ####

    reporter = Reporter(with_val=(val_loader is not None))

    reporter.setup_console_output()
    reporter.setup_sacred_output(_run)

    notebook = None
    if showoff:
        title = '3D pose model ({}@{})'.format(model_desc['type'],
                                               model_desc['version'])
        notebook = create_showoff_notebook(title, tags)
        reporter.setup_showoff_output(notebook)

    def set_progress(value):
        if notebook is not None:
            notebook.set_progress(value)

    tel = reporter.telemetry

    tel['config'].set_value(_run.config)
    tel['host_info'].set_value(get_host_info())

    ####
    # Optimiser
    ####

    if optim_algorithm == '1cycle':
        from torch import optim
        optimiser = optim.SGD(model.parameters(), lr=0)
        scheduler = make_1cycle(optimiser,
                                epochs * len(train_loader),
                                lr_max=lr,
                                momentum=0.9)
    else:
        scheduler = learning_schedule(model.parameters(), optim_algorithm, lr,
                                      lr_milestones, lr_gamma)

    ####
    # Training
    ####

    model_file = None
    if exp_out_dir:
        model_file = path.join(exp_out_dir, 'model-latest.pth')
        with open(path.join(exp_out_dir, 'config.json'), 'w') as f:
            json.dump(tel['config'].value(), f, sort_keys=True, indent=2)

    for epoch in range(epochs):
        tel['epoch'].set_value(epoch)
        print('> Epoch {:3d}/{:3d}'.format(epoch + 1, epochs))

        def on_train_progress(samples_processed):
            so_far = epoch * len(train_loader.dataset) + samples_processed
            total = epochs * len(train_loader.dataset)
            set_progress(so_far / total)

        do_training_pass(epoch, model, tel, train_loader, scheduler,
                         on_train_progress)
        if val_loader:
            do_validation_pass(epoch, model, tel, val_loader)

        _run.result = tel['train_pck'].value()[0]

        if model_file is not None:
            state = {
                'state_dict': model.state_dict(),
                'model_desc': model_desc,
                'train_datasets': train_datasets,
                'optimizer': scheduler.optimizer.state_dict(),
                'epoch': epoch + 1,
            }
            torch.save(state, model_file)

        tel.step()

    # Add the final model as a Sacred artifact
    if model_file is not None and path.isfile(model_file):
        _run.add_artifact(model_file)

    set_progress(1.0)
    return _run.result