예제 #1
0
def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(sys.maxsize)

    #load vocab and other lookups
    desc_embed = args.lmbda > 0
    print("loading lookups...")
    dicts = datasets.load_lookups(args, desc_embed=desc_embed)

    model = tools.pick_model(args, dicts)
    print(model)

    if not args.test_model:
        optimizer = optim.Adam(model.parameters(),
                               weight_decay=args.weight_decay,
                               lr=args.lr)
    else:
        optimizer = None

    params = tools.make_param_dict(args)

    return args, model, optimizer, params, dicts
예제 #2
0
def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(
        sys.maxsize)  # Sets field size to max available for strings

    #    freq_params = None
    #    if args.samples or args.lmbda > 0:
    #        print("loading code frequencies...")
    #        code_freqs, n = datasets.load_code_freqs(args.data_path, version=args.version)
    #        print("code_freqs:", sorted(code_freqs.items(), key=operator.itemgetter(1), reverse=True)[:10], "n:", n)
    #        freq_params = (code_freqs, n)

    #load vocab and other lookups
    #    desc_embed = args.lmbda > 0
    #    dicts = datasets.load_lookups(args.data_path, args.vocab, Y=args.Y, desc_embed=desc_embed, version=args.version)

    # LOAD VOCAB DICTS
    dicts = datasets.load_vocab_dict(
        args.vocab_path)  # CHANGED args.vocab --> args.vocab_path

    model = tools.pick_model(args, dicts)
    print(model)

    optimizer = optim.Adam(model.params_to_optimize(),
                           weight_decay=args.weight_decay,
                           lr=args.lr)

    params = tools.make_param_dict(args)

    return args, model, optimizer, params, dicts
예제 #3
0
def train_epochs(args, model, optimizer, params, dicts):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None
    evaluate = args.test_model is not None
    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):
        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(
                MODEL_DIR, '_'.join([
                    args.model,
                    time.strftime('%b_%d_%H:%M:%S', time.localtime())
                ]))
            os.mkdir(model_dir)
        elif args.test_model:
            model_dir = os.path.dirname(os.path.abspath(args.test_model))
        metrics_all = one_epoch(model, optimizer, args.Y, epoch, args.n_epochs,
                                args.batch_size, args.data_path, args.version,
                                test_only, dicts, model_dir, args.samples,
                                args.gpu, args.quiet)
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params
        persistence.save_everything(args, metrics_hist_all, model, model_dir,
                                    params, args.criterion, evaluate)

        if test_only:
            #we're done
            break

        if args.criterion in metrics_hist.keys():
            if early_stop(metrics_hist, args.criterion, args.patience):
                #stop training, do tests on test and train sets, and then stop the script
                print("%s hasn't improved in %d epochs, early stopping..." %
                      (args.criterion, args.patience))
                test_only = True
                args.test_model = '%s/model_best_%s.pth' % (model_dir,
                                                            args.criterion)
                model = tools.pick_model(args, dicts)
    return epoch + 1
예제 #4
0
def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(sys.maxsize)

    #load vocab and other lookups
    desc_embed = args.lmbda > 0
    print("loading lookups...")
    dicts = datasets.load_lookups(args, desc_embed=desc_embed)

    META_TEST = args.test_model is not None
    model, start_epoch, optimizer = tools.pick_model(args, dicts, META_TEST)
    print(model)

    params = tools.make_param_dict(args)

    return args, model, optimizer, params, dicts, start_epoch
예제 #5
0
def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(
        sys.maxsize)  # Sets field size to max available for strings

    # LOAD VOCAB DICTS
    dicts = datasets.load_vocab_dict(
        args.vocab_path)  # CHANGED args.vocab --> args.vocab_path

    model = tools.pick_model(args, dicts)
    print(model)

    optimizer = optim.Adam(model.params_to_optimize(),
                           weight_decay=args.weight_decay,
                           lr=args.lr)

    params = tools.make_param_dict(args)

    return args, model, optimizer, params, dicts
예제 #6
0
def train_epochs(args, model, optimizer, params, dicts, start_epoch):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    if args.reload_model:  #get existing metrics up to model checkpoint:
        with open(os.path.join(args.reload_model, 'metrics.json'), 'r') as f:
            metrics_hist_all = json.load(f)

        #this will be a little expensive-- iterate through and re-assign:
        for el in metrics_hist_all:
            if '_te' in el:
                metrics_hist_te[re.sub(
                    '_te$', '', el)] = metrics_hist_all[el][:start_epoch]
            elif '_tr' in el:
                metrics_hist_tr[re.sub(
                    '_tr$', '', el)] = metrics_hist_all[el][:start_epoch]
            else:  #dev
                metrics_hist[el] = metrics_hist_all[el][:start_epoch]

    META_TEST = args.test_model is not None
    test_only = args.test_model is not None
    evaluate = args.test_model is not None
    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(
            start_epoch, args.n_epochs
    ):  #only train for _x_ more epochs from best-saved model*
        #only test on train/test set on very last epoch
        if epoch == start_epoch and not args.test_model:
            model_dir = os.path.join(
                MODEL_DIR, '_'.join([
                    args.model,
                    time.strftime('%b_%d_%H:%M', time.localtime())
                ]))
            os.mkdir(model_dir)

            #save model versioning (git) info:
            repo = git.Repo(search_parent_directories=True)
            branch = repo.active_branch.name
            print("branch:", branch)
            sha = repo.head.object.hexsha
            print("SHA hash:", sha)
            persistence.save_git_versioning_info(
                model_dir, (branch, sha, args.description))

        elif args.test_model:
            model_dir = os.path.dirname(os.path.abspath(args.test_model))

        metrics_all = one_epoch(args, model, optimizer, epoch, test_only,
                                dicts, model_dir, META_TEST)
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])

        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params
        assert '-' not in args.criterion
        persistence.save_everything(args, metrics_hist_all, model, model_dir,
                                    params, optimizer, evaluate)

        if test_only:
            #we're done
            break

        if args.criterion in metrics_hist.keys():
            if early_stop(metrics_hist, args.criterion, args.patience):
                #stop training, do tests on test and train sets, and then stop the script
                print("%s hasn't improved in %d epochs, early stopping..." %
                      (args.criterion, args.patience))
                test_only = True
                test_m = [
                    o for o in os.listdir(model_dir) if 'model_best' in o
                ]
                assert (len(test_m) == 1)
                args.test_model = os.path.join(model_dir, test_m[0])
                model, _, _ = tools.pick_model(args, dicts, META_TEST)

    return epoch - start_epoch + 1