コード例 #1
0
def make_series(model_root_folder,
                plot_interval=100,
                limit=None,
                no_new=False,
                dont_average_embeddings=False,
                **run_model_args):
    average_embeddings = not dont_average_embeddings
    if average_embeddings:
        suffix = 'eval-averaged.pkl'
    else:
        suffix = 'eval-None-False.pkl' # holdout from when we had include_synsets and normalize_components
    store_fname = join(model_root_folder, suffix)
    print store_fname
    try:
        stats = pandas.read_pickle(store_fname)
        print "read pickle from %s" % store_fname
    except:
        stats = pandas.DataFrame()
        print 'created new frame'
    if no_new:
        return stats

    models = models_in_folder(model_root_folder)
    model_nums = sorted(models.keys())

    latest_num = model_nums[-1] if model_nums else -1
    latest_num = -1

    print 'plotting every %i' % plot_interval

    to_plot = [n for n in model_nums if n % plot_interval == 0 and n != latest_num]
    if 1 in model_nums:
        to_plot = [1] + to_plot
    if limit is not None:
        to_plot = [n for n in to_plot if n <= limit]
    vocab_container = None
    print model_root_folder
    for n in to_plot:
        if n in stats.index:
            print 'already has %i' % n
            continue
        try:
            print 'loading %i' % n
            with gzip.open(models[n]) as f:
                model = cPickle.load(f)
        except Exception as e:
            print e
            continue
        # load the vocabulary if not already cached
        if not vocab_container:
            vocab_container = get_vocab_container(model)
        embeddings = model.embeddings if not average_embeddings else model.averaged_embeddings()
        this_stats = run_model(embeddings, vocab_container,  **run_model_args)
        stats = pandas.concat([stats, pandas.DataFrame([this_stats], index=[n])]).sort()
        stats.to_pickle(store_fname)
    return stats
コード例 #2
0
def dump_params_from_directory(model_directory, dump_filename='params.json'):
    try:
        models = models_in_folder(model_directory)
        first_model = min(models)
        model_path = models[first_model]
        with gzip.open(model_path, 'rb') as f:
            model = cPickle.load(f)
    except StopIteration:
        print 'no models found in %s' % model_directory
        return
    except IOError as e:
        print 'IO error for %s' % model_directory
        print e
        return
    dump_params(model.other_params, join(model_directory, dump_filename))
コード例 #3
0
def dump_params_from_directory(model_directory, dump_filename='params.json'):
    try:
        models = models_in_folder(model_directory)
        first_model = min(models)
        model_path = models[first_model]
        with gzip.open(model_path, 'rb') as f:
            model = cPickle.load(f)
    except StopIteration:
        print 'no models found in %s' % model_directory
        return
    except IOError as e:
        print 'IO error for %s' % model_directory
        print e
        return
    dump_params(model.other_params, join(model_directory, dump_filename))
コード例 #4
0
def models_to_delete(directory, retain_every=10):
    models = models_in_folder(directory)

    if not models:
        print 'no models found'
        return {}, set()

    to_keep = set([n for n in models if n % retain_every == 0])
    # don't delete the first one, so we can use it as a reference point
    to_keep.add(min(models))

    # the last one may be currently being written
    to_keep.add(max(models))

    return dict((model, path) for model, path in models.items()
                if model not in to_keep), to_keep
コード例 #5
0
def models_to_delete(directory, retain_every=10):
    models = models_in_folder(directory)

    if not models:
        print 'no models found'
        return {}, set()

    to_keep = set([n for n in models
                   if n % retain_every == 0])
    # don't delete the first one, so we can use it as a reference point
    to_keep.add(min(models))

    # the last one may be currently being written
    to_keep.add(max(models))

    return dict((model, path)
            for model, path in models.items()
            if model not in to_keep), to_keep
コード例 #6
0
    parser.add_argument('--semantic_blocks_to_run', type=int, default=1)

    args = vars(parser.parse_args())

    # if we're only running semantic or syntactic, rho and y init must be 0 to
    # isolate the loss function to the syntactic or semantic loss
    if args['dont_run_semantic'] or args['dont_run_syntactic']:
        print 'not running joint model, setting y and rho to 0'
        args['rho'] = 0
        args['y_init'] = 0

    base_dir = args['base_dir']

    # see if this model's already been run. If it has, load it and get the
    # params
    models = models_in_folder(base_dir)
    if models:
        model_num = max(models.keys())
        print 'loading existing model %s' % models[model_num]
        with gzip.open(models[model_num]) as f:
            model = cPickle.load(f)

        model_loaded = True
        args = model.other_params
        if 'vsgd' not in args: # backward compatibility
            args['vsgd'] = False
        if 'simple_joint' not in args: # backward compatibility
            args['simple_joint'] = False
        # rewrite in case we've copied the model file into this folder
        args['base_dir'] = base_dir
    else:
コード例 #7
0
    parser.add_argument('--existing_semantic_model', help='use this existing trained model as the semantic model')
    parser.add_argument('--semantic_learning_rate', type=float, default=0.01)
    parser.add_argument('--semantic_tensor_n_hidden', type=int, default=50)
    # parser.add_argument('--semantic_block_size', type=int, default=100000)
    # parser.add_argument('--sem_validation_num_nearest', type=int, default=50, help='when running semantic validation after each round, look at the intersection of top N words in wordnet and top N by embedding for a given test word')
    # parser.add_argument('--sem_validation_num_to_test', type=int, default=500, help='in semantic validation after each round, the number of test words to sample')
    parser.add_argument('--semantic_blocks_to_run', type=int, default=1)

    args = vars(parser.parse_args())
    print args

    base_dir = args['base_dir']

    # see if this model's already been run. If it has, load it and get the
    # params
    models = models_in_folder(base_dir)
    if models:
        model_num = max(models.keys())
        print 'loading existing model %s' % models[model_num]
        with gzip.open(models[model_num]) as f:
            model = cPickle.load(f)

        model_loaded = True
        args = model.other_params
        # rewrite in case we've copied the model file into this folder
        args['base_dir'] = base_dir
    else:
        model_loaded = False
        # dump the params
        with open(os.path.join(args['base_dir'], 'params.json'), 'w') as f:
            json.dump(args, f)
コード例 #8
0
def load_models(base_path, indices=range(0, 1050, 50)):
    model_paths = utils.models_in_folder(base_path)
    return { index: load_model(path) for index, path in model_paths.iteritems()
            if index in indices }