예제 #1
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Wide residual network")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
        parser.add_argument("--model",
                            choices=['resnet', 'wrn', 'preact', 'addbn'],
                            default='wrn')
        parser.add_argument("--depth",
                            type=int,
                            choices=[18, 34, 50, 101, 152, 200],
                            default='50')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.model, args.depth, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #2
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="VGG inference")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="")
        parser.add_argument("--depth",
                            type=int,
                            choices=[11, 13, 16, 19],
                            default='11')
        parser.add_argument("--batchnorm",
                            action='store_true',
                            help='use batchnorm or not')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, 1000, args.batchnorm, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #3
0
def load_model(embed_map=None, pickle_table=None):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    if embed_map == None:
        print 'Loading word2vec embeddings...'
        embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    if pickle_table:
        t = numpy.load(pickle_table)
        table = OrderedDict()
        for k, v in t:
            table[k] = v
    else:
        print 'Creating word lookup tables...'
        table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    return model
예제 #4
0
def heart_disease(data_file, iterations=3000, learning_rate=0.1, reg_param=0.1, plot_learning_curves=False):
    dataset = dproc.read_dataset(data_file)
    X, y = dproc.preprocess(dataset)
    X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.3)
    
    # Standardize data
    X_train_std, X_cv_std = dproc.standardize(X_train, X_cv)

    activation_layers = (X.shape[1], 25, 1)
    parameters = model.init_params(activation_layers)

    model.train_model(X_train_std.T, y_train.T, parameters, iterations, learning_rate, reg_param)




    if plot_learning_curves:
        costs_train, costs_cv, m_examples = model.train_various_sizes(X_train_std.T, X_cv_std.T, y_train.T, y_cv.T, parameters, activation_layers, 3000, 0.01, reg_param)
        dataplot.plot_learning_curves(costs_train, costs_cv, m_examples)
        

    
    train_accuracy = model.compute_accuracy(X_train_std.T, y_train.T, parameters)
    cv_accuracy = model.compute_accuracy(X_cv_std.T, y_cv.T, parameters)
    print(f"Train accuracy: {train_accuracy}")
    print(f"CV accuracy: {cv_accuracy}")
예제 #5
0
def load_model(
        embed_map=None,
        path_to_model=PATH_TO_MODEL,            # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,          # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY,
        path_to_word2vec=PATH_TO_WORD2VEC
        ):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open(path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    if embed_map == None:
        print 'Loading word2vec embeddings...'
        embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print 'Creating word lookup tables...'
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    return model
예제 #6
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description='DenseNet inference')

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="densenet-121.pickle")
        parser.add_argument("--depth",
                            type=int,
                            choices=[121, 169, 201, 161],
                            default=121)

        parser.add_argument('--nb_classes', default=1000, type=int)

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, args.nb_classes, 0, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        print('start to load parameter_file')
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)
        # wait the agent finish handling http request
        agent.stop()

    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #7
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Wide residual network")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu", action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
        parser.add_argument("--model", choices=['resnet', 'wrn', 'preact',
                                                'addbn'], default='wrn')
        parser.add_argument("--depth", type=int, choices=[18, 34, 50, 101,
                                                          152, 200],
                            default='50')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.model, args.depth, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #8
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="VGG inference")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu", action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="")
        parser.add_argument("--depth", type=int, choices=[11, 13, 16, 19],
                            default='11')
        parser.add_argument("--batchnorm", action='store_true',
                            help='use batchnorm or not')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, 1000, args.batchnorm, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #9
0
def load_model(
        path_to_model='/home/shunan/Code/skip-thoughts/experiments/amazon/amazon_model_bi.npz',
        path_to_dictionary='/home/shunan/Code/skip-thoughts/experiments/amazon/word_dicts.pkl',
        embed_map=None):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    # if embed_map == None:
    #     print 'Loading word2vec embeddings...'
    #     embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print 'Creating word lookup tables...'
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    # model is just a dict.
    return model
예제 #10
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict,
              sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see model.py for more detailed explanations
    print "Starting to build sampler ..."
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams,
                                   f_init,
                                   f_next,
                                   cc0,
                                   options,
                                   trng=trng,
                                   k=k,
                                   maxlen=200,
                                   stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]
        print pid, '-', idx
        seq = _gencap(context)
        rqueue.put((idx, seq))

    return
예제 #11
0
def main(model, dictionary, dictionary_tag, source_file, target_file, saveto):

    # load model model_options
    with open('%s.pkl' % model, 'rb') as f:
        options = pkl.load(f)

    # load source dictionary and invert
    with open(dictionary, 'rb') as f:
        word_dict = pkl.load(f)
    word_idict = dict()
    for kk, vv in word_dict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # load tag dictionary and invert
    with open(dictionary_tag, 'rb') as f:
        tag_dict = pkl.load(f)
    tag_idict = dict()
    for kk, vv in tag_dict.iteritems():
        tag_idict[vv] = kk

    # allocate model parameters
    params = init_params(options)

    # load model parameters and set theano shared variables
    params = load_params(model, params)
    tparams = init_tparams(params)

    trng, use_noise, \
        x, x_mask, y, y_mask, \
        opt_ret, \
        cost, predicts = \
        build_model(tparams, options)

    print 'Building f_predicts...',
    f_predicts = theano.function([x, x_mask], predicts)
    print 'Done'

    use_noise.set_value(0.)
    valid_err = evaluation(f_predicts,
                           options,
                           tag_idict,
                           word_dict,
                           source_file,
                           saveto,
                           target_file,
                           0,
                           options['n_words_src'],
                           back_file=target_file + ".back")

    print 'Test ', valid_err
def load_model(path_to_model=default_model):
    """
    Load all model components
    """
    print path_to_model

    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_model + '.dictionary.pkl', 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open(path_to_model + '.pkl', 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions

    print 'Compiling image encoder...'
    trng, [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling sentence encoder...'
    trng = RandomStreams(1234)
    trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['worddict'] = worddict
    model['word_idict'] = word_idict
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    return model
예제 #13
0
def load_model():
    """
    Load all model components
    """
    print path_to_model

    # Load the worddict
    print "Loading dictionary..."
    with open("%s.dictionary.pkl" % path_to_model, "rb") as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print "Creating inverted dictionary..."
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = "<eos>"
    word_idict[1] = "UNK"

    # Load model options
    print "Loading model options..."
    with open("%s.pkl" % path_to_model, "rb") as f:
        options = pkl.load(f)

    # Load parameters
    print "Loading model parameters..."
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print "Compiling sentence encoder..."
    trng = RandomStreams(1234)
    trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name="f_senc")

    print "Compiling image encoder..."
    trng, [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name="f_ienc")

    # Store everything we need in a dictionary
    print "Packing up..."
    model = {}
    model["options"] = options
    model["worddict"] = worddict
    model["word_idict"] = word_idict
    model["f_senc"] = f_senc
    model["f_ienc"] = f_ienc
    return model
예제 #14
0
def load_model(path_to_model=default_model):
    """
    Load all model components
    """
    print path_to_model

    # Load the worddict
    print 'Loading dictionary...'
    with open('%s.dictionary.pkl'%path_to_model, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl'%path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    trng = RandomStreams(1234)
    trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    print 'Compiling image encoder...'
    trng, [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['worddict'] = worddict
    model['word_idict'] = word_idict
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    return model
예제 #15
0
def load_model(
        path_to_model=PATH_TO_MODEL,            # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,          # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY
        ):
    """
    Load a trained model for decoding
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl'%path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Sampler.
    trng = RandomStreams(1234)
    f_init, f_next = build_sampler(tparams, options, trng)

    # Pack everything up
    dec = dict()
    dec['options'] = options
    dec['trng'] = trng
    dec['worddict'] = worddict
    dec['word_idict'] = word_idict
    dec['tparams'] = tparams
    dec['f_init'] = f_init
    dec['f_next'] = f_next
    return dec
예제 #16
0
def load_model(
        path_to_model=PATH_TO_MODEL,  # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,  # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY):
    """
    Load a trained model for decoding
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Sampler.
    trng = RandomStreams(1234)
    f_init, f_next = build_sampler(tparams, options, trng)

    # Pack everything up
    dec = dict()
    dec['options'] = options
    dec['trng'] = trng
    dec['worddict'] = worddict
    dec['word_idict'] = word_idict
    dec['tparams'] = tparams
    dec['f_init'] = f_init
    dec['f_next'] = f_next
    return dec
예제 #17
0
def load_model(save_dir, model_name, best=True):
    """
    Load all model components
    Input are only save_dir and model_name since it is assumed that filenames keep convention
    """

    model_options = {}
    model_options['save_dir'] = save_dir
    model_options['model_name'] = model_name

    # Load model
    print 'Loading model'
    opt_filename_reload = get_opt_filename(model_options, previous=True)
    with open(opt_filename_reload, 'rb') as f:
        model = pkl.load(f)

    options = model['options']

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params_filename = get_npz_filename(model_options, best=best, previous=True)
    params = load_params(params_filename, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    print 'Compiling image encoder...'
    [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling error computation...'
    [s, im], errs = build_errors(options)
    f_err = theano.function([s, im], errs, name='f_err')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    model['f_err'] = f_err
    return model
예제 #18
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see model.py for more detailed explanations
    print "Starting to build sampler ..."
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
                                   trng=trng, k=k, maxlen=200, stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]
        print pid, '-', idx
        seq = _gencap(context)
        rqueue.put((idx, seq))

    return 
예제 #19
0
def load_model_path(path_to_model):
    """
    Load all model components
    """
    print path_to_model

    # Load model
    print 'Loading model'
    with open(path_to_model + '.pkl', 'rb') as f:
        model = pkl.load(f)

    options = model['options']

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)

    params = load_params(path_to_model + '.npz', params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    print 'Compiling image encoder...'
    [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling error computation...'
    [s, im], errs = build_errors(options)
    f_err = theano.function([s, im], errs, name='f_err')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    model['f_err'] = f_err
    return model
예제 #20
0
def load_model(path_to_model):
    """
    Load all model components
    """
    print path_to_model

    # Load model
    print 'Loading model'
    with open(path_to_model + '.pkl', 'rb') as f:
        model = pkl.load(f)

    options = model['options']

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model + '.npz', params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    print 'Compiling image encoder...'
    [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling error computation...'
    [s, im], errs = build_errors(options)
    f_err = theano.function([s,im], errs, name='f_err')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    model['f_err'] = f_err
    return model
예제 #21
0
def trainer(X, C, stmodel,
            dimctx=4800, #vector dimensionality
            dim_word=620, # word vector dimensionality
            dim=1600, # the number of GRU units
            encoder='gru',
            decoder='gru',
            doutput=False,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=40000,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 16,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            embeddings=None,
            saveFreq=1000,
            sampleFreq=100,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dimctx'] = dimctx
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['doutput'] = doutput
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['embeddings'] = embeddings
    model_options['saveFreq'] = saveFreq
    model_options['sampleFreq'] = sampleFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Load pre-trained embeddings, if applicable
    if embeddings != None:
        print 'Loading embeddings...'
        with open(embeddings, 'rb') as f:
            embed_map = pkl.load(f)
        dim_word = len(embed_map.values()[0])
        model_options['dim_word'] = dim_word
        preemb = norm_weight(n_words, dim_word)
        pz = defaultdict(lambda : 0)
        for w in embed_map.keys():
            pz[w] = 1
        for w in worddict.keys()[:n_words-2]:
            if pz[w] > 0:
                preemb[worddict[w]] = embed_map[w]
    else:
        preemb = None

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options, preemb=preemb)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, c in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                x_s = x
                mask_s = mask
                ctx_s = ctx
                for jj in xrange(numpy.minimum(10, len(ctx_s))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options,
                                               trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False)
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

        print 'Seen %d samples'%n_samples
예제 #22
0
def trainer(X, 
            dim_word=620, # word vector dimensionality
            dim=2400, # the number of GRU units
            encoder='gru',
            num_neg=4,
            gamma=1.0,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=20000,
            maxlen_w=30,
            optimizer='adam',
            batch_size = 64,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            saveFreq=1000,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['num_neg'] = num_neg
    model_options['gamma'] = gamma
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, x, x_mask, p_f, p_f_mask, p_b, p_b_mask, \
          ns_list, ns_masks, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, p_f, p_f_mask, p_b, p_b_mask] + ns_list + ns_masks

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, num_neg=num_neg, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, p_f, p_b, ns in train_iter:
            n_samples += len(x)
            uidx += 1

            # ns input is list of num_neg negative sentences, 
            # output ns is list of num_neg (batchsize, neg_len) negative sentences 
            x, x_mask, p_f, p_f_mask, p_b, p_b_mask, ns_list, ns_masks = homogeneous_data.prepare_data(x, p_f, p_b, ns, 
                                                                                worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            args = [x, x_mask, p_f, p_f_mask, p_b, p_b_mask] + ns_list + ns_masks
            cost = f_grad_shared(*args)
            f_update(lrate)

            ud = time.time() - ud_start

            if numpy.isnan(cost_p1) or numpy.isinf(cost_p1) or numpy.isnan(cost_p2) or numpy.isinf(cost_p2):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost_p1 + cost_p2, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                saveto_idx = saveto.format(uidx)

                params = unzip(tparams)
                numpy.savez(saveto_idx, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto_idx, 'wb'))
                print 'Done'

        print 'Seen %d samples'%n_samples
예제 #23
0
def trainer(
        X,
        dim_word=620,  # word vector dimensionality
        dim=2400,  # the number of GRU units
        encoder='gru',
        decoder='gru',
        max_epochs=5,
        dispFreq=1,
        decay_c=0.,
        grad_clip=5.,
        n_words=20000,
        maxlen_w=30,
        optimizer='adam',
        batch_size=512,
        saveto='/u/rkiros/research/semhash/models/toy.npz',
        dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
        saveFreq=5000,
        reload_=False,
        reload_path='output_books_full/model_ae_full_bsz_64_iter_313000.npz',
        SICK_eval=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_
    model_options['reload_path'] = reload_path

    print model_options

    # reload options
    if reload_ and os.path.exists(reload_path):
        print 'reloading...' + reload_path
        with open('%s.pkl' % reload_path, 'rb') as f:
            models_options = pkl.load(f)

        reload_idx = int(reload_path.split('_')[-1].split('.')[0])

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(reload_path):
        params = load_params(reload_path, params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX,
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    if not reload_:
        uidx = 0
    else:
        uidx = reload_idx
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(
                x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                saveto_iternum = saveto.format(uidx)

                params = unzip(tparams)
                numpy.savez(saveto_iternum, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl' % saveto_iternum, 'wb'))
                print 'Done'

                if SICK_eval:
                    print "Evaluating SICK Test performance"
                    embed_map = tools.load_googlenews_vectors()
                    model = tools.load_model(path_to_model=saveto_iternum,
                                             embed_map=embed_map)
                    yhat, pr, sr, mse = eval_sick.evaluate(model,
                                                           evaltest=True)

                    del (model)
                    del (embed_map)
                    print pr, sr, mse

                    res_save_file = saveto.format('ALL').split(
                        '.')[0] + '_SICK_EVAL.txt'
                    with open(res_save_file, 'a') as rsf:
                        cur_time = strftime("%a, %d %b %Y %H:%M:%S +0000",
                                            gmtime())
                        rsf.write('\n \n {}'.format(cur_time))
                        rsf.write('\n{}, {}, {}, {}'.format(uidx, pr, se, mse))
                    print "Done"

        print 'Seen %d samples' % n_samples
예제 #24
0
def trainer(X, C, stmodel,
            dimctx=4800, #vector dimensionality
            dim_word=620, # word vector dimensionality
            dim=1600, # the number of GRU units
            encoder='gru',
            decoder='gru',
            doutput=False,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=40000,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 16,
            saveto='adventuremodel.npz',
            dictionary='/home/jm7432/tell-tall-tales/decoding/adventure_dict_final.pkl',
            embeddings=None,
            saveFreq=1000,
            sampleFreq=100,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dimctx'] = dimctx
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['doutput'] = doutput
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['embeddings'] = embeddings
    model_options['saveFreq'] = saveFreq
    model_options['sampleFreq'] = sampleFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Load pre-trained embeddings, if applicable
    if embeddings != None:
        print 'Loading embeddings...'
        with open(embeddings, 'rb') as f:
            embed_map = pkl.load(f)
        dim_word = len(embed_map.values()[0])
        model_options['dim_word'] = dim_word
        preemb = norm_weight(n_words, dim_word)
        pz = defaultdict(lambda : 0)
        for w in embed_map.keys():
            pz[w] = 1
        for w in worddict.keys()[:n_words-2]:
            if pz[w] > 0:
                preemb[worddict[w]] = embed_map[w]
    else:
        preemb = None

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options, preemb=preemb)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, c in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                x_s = x
                mask_s = mask
                ctx_s = ctx
                for jj in xrange(numpy.minimum(10, len(ctx_s))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options,
                                               trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False)
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

        print 'Seen %d samples'%n_samples
예제 #25
0
def trainer(
        data='coco',  #f8k, f30k, coco
        margin=0.2,
        dim=1024,
        dim_image=4096,
        dim_word=300,
        encoder='gru',  # gru OR bow
        max_epochs=15,
        dispFreq=10,
        decay_c=0.,
        grad_clip=2.,
        maxlen_w=100,
        optimizer='adam',
        batch_size=128,
        saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
        validFreq=100,
        lrate=0.0002,
        reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print('Loading dataset')
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print('Creating dictionary')
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print('Dictionary size: ' + str(n_words))
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print('Building model')
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print('Building f_log_probs...', )
    f_log_probs = theano.function(inps, cost, profile=False)
    print('Done')

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print('Building f_cost...', )
    f_cost = theano.function(inps, cost, profile=False)
    print('Done')

    print('Building sentence encoder')
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...', )
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print('Building optimizers...', )
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print('Optimization')

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0

    for eidx in xrange(max_epochs):

        print('Epoch ', eidx)

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x,
                                                        im,
                                                        worddict,
                                                        maxlen=maxlen_w,
                                                        n_words=n_words)

            if x == None:
                print('Minibatch with zero sample under length ', maxlen_w)
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)

            if numpy.mod(uidx, validFreq) == 0:

                print('Computing results...')
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print("Image to text: %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr))
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print("Text to image: %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri))

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print('Saving...', )
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                    print('Done')

        print('Seen %d samples' % n_samples)
예제 #26
0
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print "reloading..." + load_from
        with open("%s.pkl" % load_from, "rb") as f:
            curr_model = pkl.load(f)
    else:
        curr_model["options"] = {}

    for k, v in kwargs.iteritems():
        curr_model["options"][k] = v

    model_options = curr_model["options"]

    # initialize logger
    import datetime

    timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name

    from logger import Log

    log = Log(
        name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1
    )

    print curr_model["options"]

    # Load training and development sets
    print "Loading dataset"
    dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True)
    train = dataset["train"]
    dev = dataset["dev"]

    # Create dictionary
    print "Creating dictionary"
    worddict = build_dictionary(train["caps"] + dev["caps"])
    print "Dictionary size: " + str(len(worddict))
    curr_model["worddict"] = worddict
    curr_model["options"]["n_words"] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb"))

    print "Loading data"
    train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print "Building model"
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print "Building sentence encoder"
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print "Building image encoder"
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print "Building f_grad...",
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print "Building errors.."
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model["f_senc"] = f_senc
    curr_model["f_ienc"] = f_ienc
    curr_model["f_err"] = f_err

    if model_options["grad_clip"] > 0.0:
        grads = [maxnorm(g, model_options["grad_clip"]) for g in grads]

    lr = tensor.scalar(name="lr")
    print "Building optimizers...",
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost)

    print "Optimization"

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options["max_epochs"]):

        print "Epoch ", eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options["lrate"])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print "NaN detected"
                return 1.0, 1.0, 1.0

            if numpy.mod(uidx, model_options["dispFreq"]) == 0:
                print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud
                log.update({"Error": float(cost)}, n_samples)

            if numpy.mod(uidx, model_options["validFreq"]) == 0:

                print "Computing results..."

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples)
                print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        "Image2Caption_R@1": r1i,
                        "Image2Caption_R@5": r5i,
                        "Image2CaptionR@10": r10i,
                        "Image2Caption_median_rank": medri,
                        "Image2Caption_mean_rank": meanri,
                    },
                    n_samples,
                )

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print "Saving...",
                    numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams))
                    print "Done"
                    vis_details["hyperparams"] = model_options
                    # Save visualization details
                    with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open("vis/roc/index.json", "r"))
                    except IOError:
                        index = {model_options["data"]: []}

                    models = index[model_options["data"]]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open("vis/roc/index.json", "w") as f:
                        json.dump(index, f)

        print "Seen %d samples" % n_samples
예제 #27
0
def train(
        dim_word=100,  # word vector dimensionality
        dim=1000,  # the number of LSTM units
        encoder='gru',
        decoder='gru_cond',
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        diag_c=0.,
        clip_c=-1.,
        lrate=0.01,
        n_words_src=100000,
        n_words=100000,
        maxlen=100,  # maximum length of the description
        optimizer='rmsprop',
        batch_size=16,
        valid_batch_size=16,
        saveto='model.npz',
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        sampleFreq=100,  # generate some samples after every sampleFreq updates
        datasets=[
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok',
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'
        ],
        valid_datasets=[
            '../data/dev/newstest2011.en.tok',
            '../data/dev/newstest2011.fr.tok'
        ],
        dictionaries=[
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok.pkl',
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok.pkl'
        ],
        use_dropout=False,
        reload_=False):

    # Model options
    model_options = locals().copy()

    worddicts = [None] * len(dictionaries)
    worddicts_r = [None] * len(dictionaries)
    for ii, dd in enumerate(dictionaries):
        with open(dd, 'rb') as f:
            worddicts[ii] = pkl.load(f)
        worddicts_r[ii] = dict()
        for kk, vv in worddicts[ii].iteritems():
            worddicts_r[ii][vv] = kk

    # reload options
    if reload_ and os.path.exists(saveto):
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    print 'Loading data'
    train = TextIterator(datasets[0],
                         datasets[1],
                         dictionaries[0],
                         dictionaries[1],
                         n_words_source=n_words_src,
                         n_words_target=n_words,
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(valid_datasets[0],
                         valid_datasets[1],
                         dictionaries[0],
                         dictionaries[1],
                         n_words_source=n_words_src,
                         n_words_target=n_words,
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, use_noise, \
          x, x_mask, y, y_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask]

    print 'Buliding sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=profile)
    print 'Done'

    cost = cost.mean()

    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    if alpha_c > 0. and not model_options['decoder'].endswith('simple'):
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * (
            (tensor.cast(y_mask.sum(0) // x_mask.sum(0), 'float32')[:, None] -
             opt_ret['dec_alphas'].sum(0))**2).sum(1).mean()
        cost += alpha_reg

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=profile)
    print 'Done'

    print 'Computing gradient...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print 'Done'
    print 'Building f_grad...',
    f_grad = theano.function(inps, grads, profile=profile)
    print 'Done'

    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (clip_c**2), g / tensor.sqrt(g2) * clip_c,
                              g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    print 'Optimization'

    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = list(numpy.load(saveto)['history_errs'])
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0]) / batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x, y in train:
            n_samples += len(x)
            uidx += 1
            use_noise.set_value(1.)

            x, x_mask, y, y_mask = prepare_data(x,
                                                y,
                                                maxlen=maxlen,
                                                n_words_src=n_words_src,
                                                n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)

                numpy.savez(saveto, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(numpy.minimum(5, x.shape[1])):
                    stochastic = True
                    sample, score = gen_sample(tparams,
                                               f_init,
                                               f_next,
                                               x[:, jj][:, None],
                                               model_options,
                                               trng=trng,
                                               k=1,
                                               maxlen=30,
                                               stochastic=stochastic,
                                               argmax=False)
                    print 'Source ', jj, ': ',
                    for vv in x[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[0]:
                            print worddicts_r[0][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Truth ', jj, ' : ',
                    for vv in y[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Sample ', jj, ': ',
                    if stochastic:
                        ss = sample
                    else:
                        score = score / numpy.array([len(s) for s in sample])
                        ss = sample[score.argmin()]
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                valid_errs = pred_probs(f_log_probs, prepare_data,
                                        model_options, valid)
                valid_err = valid_errs.mean()
                history_errs.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(history_errs).min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                if len(history_errs) > patience and valid_err >= numpy.array(
                        history_errs)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb
                    ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid_err = pred_probs(f_log_probs, prepare_data, model_options,
                           valid).mean()
    print 'Valid ', valid_err
    params = copy.copy(best_p)
    numpy.savez(saveto,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err
def trainer(**kwargs):
    """
    Train the model according to input params
    Info about input params is available in parameters.py
    """
    # Timing
    print('Starting time:', datetime.now())
    sys.stdout.flush()
    t_start_train = time.time()

    # Model options
    # load old model, including parameters, but overwrite with new options

    # Extract model options from arguments
    model_options = {}
    for k, v in kwargs.iteritems():
        model_options[k] = v

    # Print input options
    print('PARAMETERS BEFORE LOADING:')
    for k, v in model_options.items():
        print('{:>26}: {}'.format(k, v))
    sys.stdout.flush()

    # Reload options if required
    curr_model = dict()
    if model_options['reload_']:
        # Reload model parameters
        opt_filename_reload = get_opt_filename(model_options, previous=True)
        print('reloading...', opt_filename_reload)
        sys.stdout.flush()
        try:
            with open(opt_filename_reload, 'rb') as f:
                curr_model = pkl.load(f)
        except:
            print(
                'Failed to reload parameters, try to use only feeded parameters'
            )
            curr_model['options'] = {}

        # Check if we reload from best model or last model
        if model_options['load_from'] in ['Best', 'best', 'B', 'b']:
            load_from_best = True
            print('Loading from Best saved model in validation results')
        elif model_options['load_from'] in ['Last', 'last', 'L', 'l']:
            load_from_best = False
            print('Loading from Last saved model')
        else:
            print('Unkown choice for "load_from" parameter',
                  model_options['load_from'])
            print('Please choose one of:', ['Best', 'best', 'B', 'b'],
                  ['Last', 'last', 'L', 'l'])
            print('Using Last as default')
            load_from_best = False

        # Reload end-point parameters
        state_filename = get_sol_filename(model_options,
                                          best=load_from_best,
                                          previous=True)
        print('reloading...', state_filename)
        sys.stdout.flush()
        try:
            with open(state_filename, 'rb') as f:
                state_params = pkl.load(f)
            if load_from_best:
                init_epoch = state_params['epoch']
                solution = state_params
            else:
                init_epoch = state_params['epoch_done'] + 1
                solution = state_params['solution']
            best_val_score = solution['best_val_score']
            n_samples = solution['samples_seen']
        except:
            print('Failed to reload state parameters, starting from 0')
            init_epoch = 0
            best_val_score = 0
            n_samples = 0

    else:
        curr_model['options'] = {}
        init_epoch = 0
        best_val_score = 0
        n_samples = 0

    # Overwrite loaded options with input options
    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v
    model_options = curr_model['options']

    # Print final options loaded
    if model_options['reload_']:
        print('PARAMETERS AFTER LOADING:')
        for k, v in model_options.items():
            print('{:>26}: {}'.format(k, v))
        sys.stdout.flush()

    # Load training and development sets
    print('Loading dataset')
    sys.stdout.flush()

    dataset = load_dataset(dataset_name=model_options['data'],
                           embedding=model_options['embedding'],
                           path_to_data=model_options['data_path'],
                           test_subset=model_options['test_subset'],
                           load_train=True,
                           fold=0)
    train = dataset['train']
    dev = dataset['val']

    # Create word dictionary
    print('Creating dictionary')
    sys.stdout.flush()
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print('Dictionary size: ' + str(len(worddict)))
    sys.stdout.flush()
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    opt_filename_save = get_opt_filename(model_options, previous=False)
    print('Saving model parameters in', opt_filename_save)
    sys.stdout.flush()
    try:
        os.makedirs(os.path.dirname(opt_filename_save))
    except:
        pass
    pkl.dump(curr_model, open(opt_filename_save, 'wb'))

    # Load data from dataset
    print('Loading data')
    sys.stdout.flush()
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print('Building model')
    sys.stdout.flush()
    params = init_params(model_options)

    # reload network parameters, ie. weights
    if model_options['reload_']:
        params_filename = get_npz_filename(model_options,
                                           best=load_from_best,
                                           previous=True)
        params = load_params(params_filename, params)

    tparams = init_tparams(params)
    inps, cost = build_model(tparams, model_options)

    print('Building sentence encoder')
    sys.stdout.flush()
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    sys.stdout.flush()
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...')
    sys.stdout.flush()
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print('Building errors...')
    sys.stdout.flush()
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print('Building optimizers...')
    sys.stdout.flush()
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    # Get names for the files to save model and solution
    sol_filename_best = get_sol_filename(model_options,
                                         best=True,
                                         previous=False)
    sol_filename_last = get_sol_filename(model_options,
                                         best=False,
                                         previous=False)
    params_filename_best = get_npz_filename(model_options,
                                            best=True,
                                            previous=False)
    params_filename_last = get_npz_filename(model_options,
                                            best=False,
                                            previous=False)

    print('PATHS TO MODELS:')
    for filename in [
            sol_filename_best, sol_filename_last, params_filename_best,
            params_filename_last
    ]:
        print(filename)
        sys.stdout.flush()
        try:
            os.makedirs(os.path.dirname(filename))
        except:
            pass

    # Start optimization
    print('Optimization')
    sys.stdout.flush()

    uidx = 0

    # Timing
    t_start = time.time()
    print('Starting time:', datetime.now())

    for eidx in range(init_epoch, model_options['max_epochs']):
        t_start_epoch = time.time()
        print('Epoch ', eidx)
        sys.stdout.flush()

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                sys.stdout.flush()
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)
                sys.stdout.flush()

            if numpy.mod(uidx, model_options['validFreq']) == 0:
                print('Computing results...')
                sys.stdout.flush()

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr) = i2t(dev_errs)
                (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs)
                print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri, meanri))
                sys.stdout.flush()
                print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr, meanr))
                sys.stdout.flush()

                # Score
                val_score = r1 + r5 + r10 + r1i + r5i + r10i
                if val_score > best_val_score:

                    print('BEST MODEL FOUND')
                    print('Score:', val_score)
                    print('Previous best score:', best_val_score)
                    best_val_score = val_score
                    # Join in a results dict
                    results_dict = build_results_dict(r1, r5, r10, medr, r1i,
                                                      r5i, r10i, medri)

                    # Save parameters
                    print('Saving...', end=' ')
                    sys.stdout.flush()
                    numpy.savez(params_filename_best, **unzip(tparams))
                    print('Done')
                    sys.stdout.flush()

                    # Update solution
                    solution = OrderedDict([
                        ('epoch', eidx), ('update', uidx),
                        ('samples_seen', n_samples),
                        ('best_val_score', best_val_score),
                        ('best_val_res', results_dict),
                        ('time_until_results',
                         str(timedelta(seconds=(time.time() - t_start_train))))
                    ])
                    pkl.dump(solution, open(sol_filename_best, 'wb'))

        print('Seen %d samples' % n_samples)
        sys.stdout.flush()

        # Timing
        t_epoch = time.time() - t_start_epoch
        t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch))
        print('Time for this epoch:', str(timedelta(seconds=t_epoch)),
              'Average:', str(timedelta(seconds=t_epoch_avg)))
        t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1))
        print('Time since start session:',
              str(timedelta(seconds=time.time() - t_start)),
              'Estimated time to complete training:',
              str(timedelta(seconds=t_2_complete)))
        print('Current time:', datetime.now())
        sys.stdout.flush()

        # Save current model
        try:
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        except:
            solution = OrderedDict([
                ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples),
                ('best_val_score', best_val_score),
                ('time_until_results',
                 str(timedelta(seconds=(time.time() - t_start_train))))
            ])
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        pkl.dump(state_params, open(sol_filename_last, 'wb'))

        # Save parameters
        print('Saving LAST npz...', end=' ')
        sys.stdout.flush()
        numpy.savez(params_filename_last, **unzip(tparams))
        print('Done')
        sys.stdout.flush()

    return solution
예제 #29
0
def trainer(load_from=None,
            save_dir='snapshots',
            name='anon',
            **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl'%load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training',
              xLabel='Examples Seen', saveFrequency=1)


    print curr_model['options']




    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps']+dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))


    print 'Loading data'
    train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err



    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0


    
    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)


            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)


                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    index = json.load(open('vis/roc/index.json', 'r'))
                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)






        print 'Seen %d samples'%n_samples
예제 #30
0
파일: char.py 프로젝트: mmuehl/715_project
def main(data_path,save_path,num_epochs=NUM_EPOCHS):

    print("Preparing Data...")

    # Training data
    with open(data_path,'r') as f:
	X = f.read().splitlines()

    # Build dictionary
    chardict, charcount = batched_tweets.build_dictionary(X)
    batched_tweets.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path)
    trainX = batched_tweets.grouper(X)
    train_iter = batched_tweets.BatchedTweets(trainX, validation_size=N_VAL, batch_size=N_BATCH, maxlen=MAX_LENGTH)

    # Validation set
    t_val, tp_val, tn_val = train_iter.validation_set()
    t_val, t_val_m, tp_val, tp_val_m, tn_val, tn_val_m = batched_tweets.prepare_data(t_val, tp_val, tn_val, chardict, maxlen=MAX_LENGTH)

    print("Building network...")

    # params
    n_char = len(chardict.keys()) + 1
    params = init_params(n_chars=n_char)

    # Tweet variables
    tweet = T.itensor3()
    ptweet = T.itensor3()
    ntweet = T.itensor3()

    # masks
    t_mask = T.fmatrix()
    tp_mask = T.fmatrix()
    tn_mask = T.fmatrix()

    # Embeddings
    emb_t = tweet2vec(tweet, t_mask, params)
    emb_tp = tweet2vec(ptweet, tp_mask, params)
    emb_tn = tweet2vec(ntweet, tn_mask, params)

    # batch loss
    D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
    D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn))
    gap = D1-D2+M
    loss = gap*(gap>0)
    cost = T.sum(loss)

    # params and updates
    print("Computing updates...")
    updates = lasagne.updates.adagrad(cost, params.values(), LEARNING_RATE)

    # Theano function
    print("Compiling theano functions...")
    inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask]
    dist = theano.function(inps,[D1,D2])
    l = theano.function(inps,loss)
    t2v = theano.function(inps,[emb_t,emb_tp,emb_tn])
    cost_val = theano.function(inps,cost)
    train = theano.function(inps,cost,updates=updates)

    # Training
    print("Training...")
    uidx = 0
    try:
	for epoch in range(num_epochs):
	    n_samples = 0
	    print("Epoch {}".format(epoch))

	    for x,y,z in train_iter:
		n_samples +=len(x)
		uidx += 1

		x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data(x, y, z, chardict, maxlen=MAX_LENGTH)

		if x==None:
		    print("Minibatch with zero samples under maxlength.")
		    uidx -= 1
		    continue

		ud_start = time.time()
		curr_cost = train(x,x_m,y,y_m,z,z_m)
		ud = time.time() - ud_start

		if np.isnan(curr_cost) or np.isinf(curr_cost):
		    print("Nan detected.")
		    return

		if np.mod(uidx, DISPF) == 0:
		    print("Epoch {} Update {} Cost {} Time {}".format(epoch,uidx,curr_cost,ud))

		if np.mod(uidx,SAVEF) == 0:
		    print("Saving...")
		    saveparams = OrderedDict()
		    for kk,vv in params.iteritems():
			saveparams[kk] = vv.get_value()
		    np.savez('%s/model.npz' % save_path,**saveparams)
		    print("Done.")

	    validation_cost = cost_val(t_val,t_val_m,tp_val,tp_val_m,tn_val,tn_val_m)
	    print("Epoch {} Validation Cost {}".format(epoch, validation_cost))
	    print("Seen {} samples.".format(n_samples))

    except KeyboardInterrupt:
	pass
예제 #31
0
def main(train_path,val_path,save_path,num_epochs=NUM_EPOCHS):

    shutil.copyfile('settings.py','%s/settings.txt'%save_path)

    print("Preparing Data...")

    # Training data
    if not RELOAD_DATA:
        print("Creating Pairs...")
        trainX = batched_tweets.create_pairs(train_path)
        valX = batched_tweets.create_pairs(val_path)
        print("Number of training pairs = {}".format(len(trainX[0])))
        print("Number of validation pairs = {}".format(len(valX[0])))
        with open('%s/train_pairs.pkl'%(save_path),'w') as f:
            pkl.dump(trainX, f)
        with open('%s/val_pairs.pkl'%(save_path),'w') as f:
            pkl.dump(valX, f)
    else:
        print("Loading Pairs...")
        with open(train_path,'r') as f:
            trainX = pkl.load(f)
        with open(val_path,'r') as f:
            valX = pkl.load(f)

    if not RELOAD_MODEL:
        # Build dictionary
        chardict, charcount = batched_tweets.build_dictionary(trainX[0] + trainX[1])
        n_char = len(chardict.keys()) + 1
        batched_tweets.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path)

        # params
        n_char = len(chardict.keys()) + 1
        params = init_params(n_chars=n_char)

    else:
        print("Loading model params...")
        params = load_params_shared('%s/model.npz' % save_path)

        print("Loading dictionary...")
        with open('%s/dict.pkl' % save_path, 'rb') as f:
            chardict = pkl.load(f)
        n_char = len(chardict.keys()) + 1

    train_iter = batched_tweets.BatchedTweets(trainX, batch_size=N_BATCH, maxlen=MAX_LENGTH)
    val_iter = batched_tweets.BatchedTweets(valX, batch_size=512, maxlen=MAX_LENGTH)

    print("Building network...")

    # Tweet variables
    tweet = T.itensor3()
    ptweet = T.itensor3()
    ntweet = T.itensor3()

    # masks
    t_mask = T.fmatrix()
    tp_mask = T.fmatrix()
    tn_mask = T.fmatrix()

    # Embeddings
    emb_t, net = tweet2vec(tweet, t_mask, params, n_char)
    emb_tp, net = tweet2vec(ptweet, tp_mask, params, n_char)
    emb_tn, net = tweet2vec(ntweet, tn_mask, params, n_char)

    # batch loss
    D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
    D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn))
    gap = D1-D2+M
    loss = gap*(gap>0)
    cost = T.mean(loss)
    cost_only = T.mean(loss)
 
    # params and updates
    print("Computing updates...")
    lr = LEARNING_RATE
    mu = MOMENTUM
    updates = lasagne.updates.nesterov_momentum(cost, lasagne.layers.get_all_params(net, trainable=True), lr, momentum=mu)

    # Theano function
    print("Compiling theano functions...")
    inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask]
    #dist = theano.function(inps,[D1,D2])
    #l = theano.function(inps,loss)
    #t2v = theano.function(inps,[emb_t,emb_tp,emb_tn])
    cost_val = theano.function(inps,[cost_only, emb_t, emb_tp, emb_tn])
    train = theano.function(inps,cost,updates=updates)

    # Training
    print("Training...")
    uidx = 0
    try:
        for epoch in range(num_epochs):
            n_samples = 0
            train_cost = 0.
            print("Epoch {}".format(epoch))

            if USE_SCHEDULE:
                # schedule
                if epoch > 0 and (epoch+1) % 10 == 0:
                    print("Updating Schedule...")
                    lr = max(1e-5,lr/10)
                    mu = mu - 0.1
                    updates = lasagne.updates.nesterov_momentum(cost, lasagne.layers.get_all_params(net, trainable=True), lr, momentum=mu)
                    train = theano.function(inps,cost,updates=updates)

            if epoch >= 10:
                cost = T.mean(loss) + REGULARIZATION*lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)
                reg_only = REGULARIZATION*lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)
                reg_val = theano.function([],reg_only)
                train = theano.function(inps,cost,updates=updates)

            ud_start = time.time()
            for x,y,z in train_iter:
                if not x:
                    print("Minibatch with no valid triples")
                    continue

                n_samples +=len(x)
                uidx += 1

                if DEBUG and uidx > 3:
                    sys.exit()

                if DEBUG:
                    print("Tweets = {}".format(x[:5]))

                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data(x, y, z, chardict, maxlen=MAX_LENGTH, n_chars=n_char)

                if x==None:
                    print("Minibatch with zero samples under maxlength.")
                    uidx -= 1
                    continue

                if DEBUG:
                    print("Params before update...")
                    print_params(params)
                    display_actv(x,x_m,y,y_m,z,z_m,inps,net,'before')
                    cb, embb, embb_p, embb_n = cost_val(x,x_m,y,y_m,z,z_m)

                curr_cost = train(x,x_m,y,y_m,z,z_m)
                ud = time.time() - ud_start
                train_cost += curr_cost*len(x)

                if DEBUG:
                    print("Params after update...")
                    print_params(params)
                    display_actv(x,x_m,y,y_m,z,z_m,inps,net,'after')
                    ca, emba, emba_p, emba_n = cost_val(x,x_m,y,y_m,z,z_m)
                    print("Embeddings before = {}".format(embb[:5]))
                    print("Embeddings after = {}".format(emba[:5]))
                    print("Cost before update = {} \nCost after update = {}".format(cb, ca))

                if np.isnan(curr_cost) or np.isinf(curr_cost):
                    print("Nan detected.")
                    return

                if np.mod(uidx, DISPF) == 0:
                    print("Epoch {} Update {} Cost {} Time {} Samples {}".format(epoch,uidx,curr_cost,ud,len(x)))

                if np.mod(uidx,SAVEF) == 0:
                    print("Saving...")
                    saveparams = OrderedDict()
                    for kk,vv in params.iteritems():
                        saveparams[kk] = vv.get_value()
                        np.savez('%s/model.npz' % save_path,**saveparams)
                    print("Done.")

            print("Computing Validation Cost...")
            validation_cost = 0.
            n_val_samples = 0
            for x,y,z in val_iter:
                if not x:
                    print("Validation: Minibatch with no valid triples")
                    continue

                n_val_samples += len(x)
                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data(x, y, z, chardict, maxlen=MAX_LENGTH, n_chars=n_char)

                if x==None:
                    print("Validation: Minibatch with zero samples under maxlength")
                    continue

                curr_cost, _, _, _ = cost_val(x,x_m,y,y_m,z,z_m)
                validation_cost += curr_cost*len(x)

            if epoch >= 10:
                regularization_cost = reg_val()
            else:
                regularization_cost = 0
            print("Epoch {} Training Cost {} Validation Cost {} Regularization Cost {}".format(epoch, train_cost/n_samples, validation_cost/n_val_samples, regularization_cost))
            print("Seen {} samples.".format(n_samples))

            for kk,vv in params.iteritems():
                print("Param {} Epoch {} Max {} Min {}".format(kk, epoch, np.max(vv.get_value()), np.min(vv.get_value())))

            print("Saving...")
            saveparams = OrderedDict()
            for kk,vv in params.iteritems():
                saveparams[kk] = vv.get_value()
                np.savez('%s/model_%d.npz' % (save_path,epoch),**saveparams)
            print("Done.")
            
            if False:
                # store embeddings and data
                features = np.zeros((len(train_iter.data[0]),3*WDIM))
                distances = np.zeros((len(train_iter.data[0]),2))
                for idx, triple in enumerate(zip(train_iter.data[0],train_iter.data[1],train_iter.data[2])):
                    x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data([triple[0]], [triple[1]], [triple[2]], chardict, maxlen=MAX_LENGTH, n_chars=n_char)
                    if x==None:
                        continue
                    emb1, emb2, emb3 = t2v(x,x_m,y,y_m,z,z_m)
                    emb1 = np.reshape(emb1, (WDIM))
                    emb2 = np.reshape(emb2, (WDIM))
                    emb3 = np.reshape(emb3, (WDIM))
                    features[idx,:] = np.concatenate((emb1,emb2,emb3),axis=0)
                    distances[idx,0] = 1-np.dot(emb1,emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                    distances[idx,1] = 1-np.dot(emb1,emb3)/(np.linalg.norm(emb1)*np.linalg.norm(emb3))
                with open('debug/feat_%d.npy'%epoch,'w') as df:
                    np.save(df,features)
                with open('debug/dist_%d.npy'%epoch,'w') as ds:
                    np.save(ds,distances)
        if False:
            with open('debug/data.txt','w') as dd:
                for triple in zip(train_iter.data[0],train_iter.data[1],train_iter.data[2]):
                    dd.write('%s\t%s\t%s\n' % (triple[0],triple[1],triple[2]))

    except KeyboardInterrupt:
        pass
예제 #32
0
파일: main.py 프로젝트: yikangli/ConvLSTM
def train_lstm(
    imshape=(3, 224, 224),
    nCategories=51,
    batch=10,
    max_epochs=3,  # The maximum number of epoch to run.
    lrate=[0.001, 0.001, 0.001],
    decay_c=0.,  # Weight decay for the classifier applied to the U weights.
    maxlen=100,  # Maximum length of captions 
    optimizer='rmsprop',  # sgd, adadelta and rmsprop available
    saveto='recognition_model',  # The best model will be saved there.
    dispFreq=10000,  # Display to stdout the training progress every N updates.
    saveFreq=20000,  #9210,  # Save the parameters after every saveFreq updates.
    validFreq=140000,  #2*9210,
    testFreq=5 * 140000,
    margin=0.55,

    # Parameter for extra option.
    reload_model=None,  # Path to a saved model we want to start from.
    test_size=-1,  # If >0, we keep only this number of test example.
    load_from_old=False,
):

    # Model options
    model_options = {}
    model_options['image_shape'] = imshape
    model_options['nCategories'] = nCategories

    model_options['batch'] = batch
    model_options['lrate'] = lrate
    model_options['max_epochs'] = max_epochs
    model_options['optimizer'] = optimizer

    model_options['dispFreq'] = dispFreq
    model_options['validFreq'] = validFreq
    model_options['saveto'] = saveto
    model_options['saveFreq'] = saveFreq

    ########################################################
    # Load training and development sets

    print('\nBuilding Forward Pass')
    # Read from Previous Training Result
    if load_from_old:
        print('Loading Previously Trained Weight')
        start = 14
        with open('../save/weights_14.pkl', 'rb') as W:
            tparams = pickle.load(W)

    else:
        start = 0
        params = init_params(model_options)
        pdb.set_trace()
        tparams = init_tparams(params)
    pdb.set_trace()
    trng, use_noise, x, mask, cost, pred = build_model(params, model_options)
    print('Done')

    pdb.set_trace()

    print('\nBuilding Backward Pass')
    if decay_c > 0.:
        decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    f_cost = theano.function(inputs=[mask, x, y],
                             outputs=[cost],
                             name='f_cost',
                             allow_input_downcast=True)

    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad = theano.function(inputs=[mask, x, y],
                             outputs=grads,
                             name='f_grad',
                             allow_input_downcast=True)
    print('Done')
    ########################################################
    print('Optimization')
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, x, mask,
                                              cost)
    ########################################################
    '''
예제 #33
0
def main(train_path,val_path,save_path,num_epochs=NUM_EPOCHS):

    shutil.copyfile('settings.py','%s/settings.txt'%save_path)

    print("Preparing Data...")

    # Training data
    if not RELOAD_DATA:
        print("Creating Pairs...")
        trainX = batched_tweets.create_pairs(train_path)
        valX = batched_tweets.create_pairs(val_path)
        print("Number of training pairs = {}".format(len(trainX[0])))
        print("Number of validation pairs = {}".format(len(valX[0])))
        with open('%s/train_pairs.pkl'%(save_path),'w') as f:
            pkl.dump(trainX, f)
        with open('%s/val_pairs.pkl'%(save_path),'w') as f:
            pkl.dump(valX, f)
    else:
        print("Loading Pairs...")
        with open(train_path,'r') as f:
            trainX = pkl.load(f)
        with open(val_path,'r') as f:
            valX = pkl.load(f)

    if not RELOAD_MODEL:
        # Build dictionary
        chardict, charcount = batched_tweets.build_dictionary(trainX[0] + trainX[1])
        n_char = len(chardict.keys()) + 1
        batched_tweets.save_dictionary(chardict,charcount,'%s/dict.pkl' % save_path)

        # params
        n_char = len(chardict.keys()) + 1
        params = init_params(n_chars=n_char)

    else:
        print("Loading model params...")
        params = load_params_shared('%s/model.npz' % save_path)

        print("Loading dictionary...")
        with open('%s/dict.pkl' % save_path, 'rb') as f:
            chardict = pkl.load(f)
        n_char = len(chardict.keys()) + 1

    train_iter = batched_tweets.BatchedTweets(trainX, batch_size=N_BATCH, maxlen=MAX_LENGTH)
    val_iter = batched_tweets.BatchedTweets(valX, batch_size=512, maxlen=MAX_LENGTH)

    print("Building network...")

    # Tweet variables
    tweet = T.itensor3()
    ptweet = T.itensor3()
    ntweet = T.itensor3()

    # masks
    t_mask = T.fmatrix()
    tp_mask = T.fmatrix()
    tn_mask = T.fmatrix()

    # Embeddings
    emb_t, net = tweet2vec(tweet, t_mask, params, n_char)
    emb_tp, net = tweet2vec(ptweet, tp_mask, params, n_char)
    emb_tn, net = tweet2vec(ntweet, tn_mask, params, n_char)

    # batch loss
    D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
    D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn))
    gap = D1-D2+M
    loss = gap*(gap>0)
    cost = T.mean(loss) + REGULARIZATION*lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)
    cost_only = T.mean(loss)
    reg_only = REGULARIZATION*lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)

    # params and updates
    print("Computing updates...")
    lr = LEARNING_RATE
    mu = MOMENTUM
    updates = lasagne.updates.nesterov_momentum(cost, lasagne.layers.get_all_params(net, trainable=True), lr, momentum=mu)

    # Theano function
    print("Compiling theano functions...")
    inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask]
    #dist = theano.function(inps,[D1,D2])
    #l = theano.function(inps,loss)
    #t2v = theano.function(inps,[emb_t,emb_tp,emb_tn])
    cost_val = theano.function(inps,[cost_only, emb_t, emb_tp, emb_tn])
    reg_val = theano.function([],reg_only)
    train = theano.function(inps,cost,updates=updates)

    # Training
    print("Training...")
    uidx = 0
    try:
        for epoch in range(num_epochs):
            n_samples = 0
            train_cost = 0.
            print("Epoch {}".format(epoch))

            if USE_SCHEDULE:
                # schedule
                if epoch > 0 and epoch % 5 == 0:
                    print("Updating Schedule...")
                    lr = max(1e-5,lr/2)
                    mu = mu - 0.05
                    updates = lasagne.updates.nesterov_momentum(cost, lasagne.layers.get_all_params(net), lr, momentum=mu)
                    train = theano.function(inps,cost,updates=updates)

            ud_start = time.time()
            for x,y,z in train_iter:
                if not x:
                    print("Minibatch with no valid triples")
                    continue

                n_samples +=len(x)
                uidx += 1

                if DEBUG and uidx > 3:
                    sys.exit()

                if DEBUG:
                    print("Tweets = {}".format(x[:5]))

                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data(x, y, z, chardict, maxlen=MAX_LENGTH, n_chars=n_char)

                if x==None:
                    print("Minibatch with zero samples under maxlength.")
                    uidx -= 1
                    continue

                if DEBUG:
                    print("Params before update...")
                    print_params(params)
                    display_actv(x,x_m,y,y_m,z,z_m,inps,net,'before')
                    cb, embb, embb_p, embb_n = cost_val(x,x_m,y,y_m,z,z_m)

                curr_cost = train(x,x_m,y,y_m,z,z_m)
                ud = time.time() - ud_start
                train_cost += curr_cost*len(x)

                if DEBUG:
                    print("Params after update...")
                    print_params(params)
                    display_actv(x,x_m,y,y_m,z,z_m,inps,net,'after')
                    ca, emba, emba_p, emba_n = cost_val(x,x_m,y,y_m,z,z_m)
                    print("Embeddings before = {}".format(embb[:5]))
                    print("Embeddings after = {}".format(emba[:5]))
                    print("Cost before update = {} \nCost after update = {}".format(cb, ca))

                if np.isnan(curr_cost) or np.isinf(curr_cost):
                    print("Nan detected.")
                    return

                if np.mod(uidx, DISPF) == 0:
                    print("Epoch {} Update {} Cost {} Time {} Samples {}".format(epoch,uidx,curr_cost,ud,len(x)))

                if np.mod(uidx,SAVEF) == 0:
                    print("Saving...")
                    saveparams = OrderedDict()
                    for kk,vv in params.iteritems():
                        saveparams[kk] = vv.get_value()
                        np.savez('%s/model.npz' % save_path,**saveparams)
                    print("Done.")

            print("Computing Validation Cost...")
            validation_cost = 0.
            n_val_samples = 0
            for x,y,z in val_iter:
                if not x:
                    print("Validation: Minibatch with no valid triples")
                    continue

                n_val_samples += len(x)
                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data(x, y, z, chardict, maxlen=MAX_LENGTH, n_chars=n_char)

                if x==None:
                    print("Validation: Minibatch with zero samples under maxlength")
                    continue

                curr_cost, _, _, _ = cost_val(x,x_m,y,y_m,z,z_m)
                validation_cost += curr_cost*len(x)

            regularization_cost = reg_val()
            print("Epoch {} Training Cost {} Validation Cost {} Regularization Cost {}".format(epoch, train_cost/n_samples, validation_cost/n_val_samples, regularization_cost))
            print("Seen {} samples.".format(n_samples))

            for kk,vv in params.iteritems():
                print("Param {} Epoch {} Max {} Min {}".format(kk, epoch, np.max(vv.get_value()), np.min(vv.get_value())))

            print("Saving...")
            saveparams = OrderedDict()
            for kk,vv in params.iteritems():
                saveparams[kk] = vv.get_value()
                np.savez('%s/model_%d.npz' % (save_path,epoch),**saveparams)
            print("Done.")
            
            if False:
                # store embeddings and data
                features = np.zeros((len(train_iter.data[0]),3*WDIM))
                distances = np.zeros((len(train_iter.data[0]),2))
                for idx, triple in enumerate(zip(train_iter.data[0],train_iter.data[1],train_iter.data[2])):
                    x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data([triple[0]], [triple[1]], [triple[2]], chardict, maxlen=MAX_LENGTH, n_chars=n_char)
                    if x==None:
                        continue
                    emb1, emb2, emb3 = t2v(x,x_m,y,y_m,z,z_m)
                    emb1 = np.reshape(emb1, (WDIM))
                    emb2 = np.reshape(emb2, (WDIM))
                    emb3 = np.reshape(emb3, (WDIM))
                    features[idx,:] = np.concatenate((emb1,emb2,emb3),axis=0)
                    distances[idx,0] = 1-np.dot(emb1,emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                    distances[idx,1] = 1-np.dot(emb1,emb3)/(np.linalg.norm(emb1)*np.linalg.norm(emb3))
                with open('debug/feat_%d.npy'%epoch,'w') as df:
                    np.save(df,features)
                with open('debug/dist_%d.npy'%epoch,'w') as ds:
                    np.save(ds,distances)
        if False:
            with open('debug/data.txt','w') as dd:
                for triple in zip(train_iter.data[0],train_iter.data[1],train_iter.data[2]):
                    dd.write('%s\t%s\t%s\n' % (triple[0],triple[1],triple[2]))

    except KeyboardInterrupt:
        pass
예제 #34
0
def load_model(path_to_model):
    """
    Load all model components
    """
    print path_to_model

    # Load model
    print 'Loading model'
    with open(path_to_model + '.pkl', 'rb') as f:
        model = pkl.load(f)

    options = model['options']
    options['use_topic'] = True
    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model + '.npz', params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    #print 'Compiling sentence encoder with topics...'
    #[x, x_mask, topics], sentences = build_sentence_encoder_with_topicvector(tparams, options)
    #f_senc_t = theano.function([x, x_mask, topics], sentences, name='f_senc_t')

    print 'Compiling image encoder...'
    [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling topic encoder...'
    [t], topics = build_topic_encoder(tparams, options)
    f_tenc = theano.function([t], topics, name='f_tenc')
    '''
    print 'Compiling topic_vector1 encoder...'
    [t], topics = build_topic_vector1_encoder(tparams, options)
    f_tv1enc = theano.function([t], topics, name='f_tv1enc')

    print 'Compiling topic_vector2 encoder...'
    [t], topics = build_topic_encoder(tparams, options)
    f_tv2enc = theano.function([t], topics, name='f_tv2enc')
    '''
    print 'Compiling error computation...'
    [s, im], errs = build_errors(options)
    f_err = theano.function([s, im], errs, name='f_err')
    '''
    [s, im, t], errs_t1 = build_errors_3level(options)
    f_err_t1 = theano.function([s,im,t], errs_t1, name='f_err_t1')
    
    [s_t, im_t], errs_t2 = build_errors_t2(options)
    f_err_t2 = theano.function([s_t, im_t], errs_t2, name='f_err_t2')
    '''
    # Store everything we need in a dictionary
    print 'Packing up...'
    model['f_senc'] = f_senc
    #model['f_senc_t'] = f_senc_t
    model['f_ienc'] = f_ienc
    model['f_tenc'] = f_tenc
    #model['f_tv1enc'] = f_tv1enc
    #model['f_tv2enc'] = f_tv2enc
    model['f_err'] = f_err
    #model['f_err_t1'] = f_err_t1
    #model['f_err_t2'] = f_err_t2

    return model
예제 #35
0
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl' % load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime(
        '%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName,
              hyperparams=model_options,
              saveDir='vis/training',
              xLabel='Examples Seen',
              saveFrequency=1)

    print curr_model['options']

    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'],
                           cnn=model_options['cnn'],
                           load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))

    print 'Loading data'
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)

            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs,
                                                              vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1, r5, r10, medr, meanr)
                log.update(
                    {
                        'R@1': r1,
                        'R@5': r5,
                        'R@10': r10,
                        'median_rank': medr,
                        'mean_rank': meanr
                    }, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        'Image2Caption_R@1': r1i,
                        'Image2Caption_R@5': r5i,
                        'Image2CaptionR@10': r10i,
                        'Image2Caption_median_rank': medri,
                        'Image2Caption_mean_rank': meanri
                    }, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open(
                            'vis/roc/%s/%s.json' %
                        (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open('vis/roc/index.json', 'r'))
                    except IOError:
                        index = {model_options['data']: []}

                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)

        print 'Seen %d samples' % n_samples
예제 #36
0
def train(
        dim_word_desc=400,  # word vector dimensionality
        dim_word_q=400,
        dim_word_ans=600,
        dim_proj=300,
        dim=400,  # the number of LSTM units
        encoder_desc='lstm',
        encoder_desc_word='lstm',
        encoder_desc_sent='lstm',
        use_dq_sims=False,
        eyem=None,
        learn_h0=False,
        use_desc_skip_c_g=False,
        debug=False,
        encoder_q='lstm',
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        clip_c=-1.,
        lrate=0.01,
        n_words_q=49145,
        n_words_desc=115425,
        n_words_ans=409,
        pkl_train_files=None,
        pkl_valid_files=None,
        maxlen=2000,  # maximum length of the description
        optimizer='rmsprop',
        batch_size=2,
        vocab=None,
        valid_batch_size=16,
        use_elu_g=False,
        saveto='model.npz',
        model_dir=None,
        ms_nlayers=3,
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        datasets=[None],
        truncate=400,
        momentum=0.9,
        use_bidir=False,
        cost_mask=None,
        valid_datasets=[
            '/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5',
            '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'
        ],
        dropout_rate=0.5,
        use_dropout=True,
        reload_=True,
        **opt_ds):

    ensure_dir_exists(model_dir)
    mpath = os.path.join(model_dir, saveto)
    mpath_best = os.path.join(model_dir, prfx("best", saveto))
    mpath_last = os.path.join(model_dir, prfx("last", saveto))
    mpath_stats = os.path.join(model_dir, prfx("stats", saveto))

    # Model options
    model_options = locals().copy()
    model_options['use_sent_reps'] = opt_ds['use_sent_reps']
    stats = defaultdict(list)

    del model_options['eyem']
    del model_options['cost_mask']

    if cost_mask is not None:
        cost_mask = sharedX(cost_mask)

    # reload options and parameters
    if reload_:
        print "Reloading the model."
        if os.path.exists(mpath_best):
            print "Reloading the best model from %s." % mpath_best
            with open(os.path.join(mpath_best, '%s.pkl' % mpath_best),
                      'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath_best, params)
        elif os.path.exists(mpath):
            print "Reloading the model from %s." % mpath
            with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath, params)
        else:
            raise IOError("Couldn't open the file.")
    else:
        print "Couldn't reload the models initializing from scratch."
        params = init_params(model_options)

    if datasets[0]:
        print "Short dataset", datasets[0]

    print 'Loading data'
    print 'Building model'
    if pkl_train_files is None or pkl_valid_files is None:
        train, valid, test = load_data(path=datasets[0],
                                       valid_path=valid_datasets[0],
                                       test_path=valid_datasets[1],
                                       batch_size=batch_size,
                                       **opt_ds)
    else:
        train, valid, test = load_pkl_data(train_file_paths=pkl_train_files,
                                           valid_file_paths=pkl_valid_files,
                                           batch_size=batch_size,
                                           vocab=vocab,
                                           eyem=eyem,
                                           **opt_ds)

    tparams = init_tparams(params)
    trng, use_noise, inps_d, \
                     opt_ret, \
                     cost, errors, ent_errors, ent_derrors, probs = \
                        build_model(tparams,
                                    model_options,
                                    prepare_data if not opt_ds['use_sent_reps'] \
                                            else prepare_data_sents,
                                    valid,
                                    cost_mask=cost_mask)

    alphas = opt_ret['dec_alphas']

    if opt_ds['use_sent_reps']:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'],
                inps_d['slen'], inps_d['qlen'],\
                inps_d['ent_mask']
                ]
    else:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'], \
                inps_d['qlen'], \
                inps_d['ent_mask']]

    outs = [cost, errors, probs, alphas]
    if ent_errors:
        outs += [ent_errors]

    if ent_derrors:
        outs += [ent_derrors]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, outs, profile=profile)
    print 'Done'

    # Apply weight decay on the feed-forward connections
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.

        for kk, vv in tparams.iteritems():
            if "logit" in kk or "ff" in kk:
                weight_decay += (vv**2).sum()

        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Computing gradient...',
    grads = safe_grad(cost, itemlist(tparams))
    print 'Done'

    # Gradient clipping:
    if clip_c > 0.:
        g2 = get_norms(grads)
        for p, g in grads.iteritems():
            grads[p] = tensor.switch(g2 > (clip_c**2),
                                     (g / tensor.sqrt(g2 + 1e-8)) * clip_c, g)
    inps.pop()
    if optimizer.lower() == "adasecant":
        learning_rule = Adasecant(delta_clip=25.0,
                                  use_adagrad=True,
                                  grad_clip=0.25,
                                  gamma_clip=0.)
    elif optimizer.lower() == "rmsprop":
        learning_rule = RMSPropMomentum(init_momentum=momentum)
    elif optimizer.lower() == "adam":
        learning_rule = Adam()
    elif optimizer.lower() == "adadelta":
        learning_rule = AdaDelta()

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    learning_rule = None

    if learning_rule:
        f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr,
                                                          grads=grads,
                                                          inp=inps,
                                                          cost=cost,
                                                          errors=errors)
    else:
        f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps,
                                                  cost, errors)

    print 'Done'
    print 'Optimization'
    history_errs = []
    # reload history
    if reload_ and os.path.exists(mpath):
        history_errs = list(numpy.load(mpath)['history_errs'])

    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    best_found = False
    uidx = 0
    estop = False

    train_cost_ave, train_err_ave, \
            train_gnorm_ave = reset_train_vals()

    for eidx in xrange(max_epochs):
        n_samples = 0

        if train.done:
            train.reset()

        for d_, q_, a, em in train:
            n_samples += len(a)
            uidx += 1
            use_noise.set_value(1.)

            if opt_ds['use_sent_reps']:
                # To mask the description and the question.
                d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(
                    d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(
                    d, d_mask, q, q_mask, a, dlen, slen, qlen)
            else:
                d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(
                    d, d_mask, q, q_mask, a, dlen, qlen)

            upnorm = f_update(lrate)
            ud = time.time() - ud_start

            # Collect the running ave train stats.
            train_cost_ave = running_ave(train_cost_ave, cost)
            train_err_ave = running_ave(train_err_ave, errors)
            train_gnorm_ave = running_ave(train_gnorm_ave, gnorm)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                import ipdb
                ipdb.set_trace()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, ' Update ', uidx, \
                        ' Cost ', cost, ' UD ', ud, \
                        ' UpNorm ', upnorm[0].tolist(), \
                        ' GNorm ', gnorm, \
                        ' Pnorm ', pnorm, 'Terrors ', errors

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',
                if best_p is not None and best_found:
                    numpy.savez(mpath_best,
                                history_errs=history_errs,
                                **best_p)
                    pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb'))
                else:
                    params = unzip(tparams)

                numpy.savez(mpath, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % mpath, 'wb'))
                pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb'))

                print 'Done'
                print_param_norms(tparams)

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                if valid.done:
                    valid.reset()

                valid_costs, valid_errs, valid_probs, \
                        valid_alphas, error_ent, error_dent = eval_model(f_log_probs,
                                                  prepare_data if not opt_ds['use_sent_reps'] \
                                                    else prepare_data_sents,
                                                  model_options,
                                                  valid,
                                                  use_sent_rep=opt_ds['use_sent_reps'])

                valid_alphas_ = numpy.concatenate(
                    [va.argmax(0) for va in valid_alphas.tolist()], axis=0)
                valid_err = valid_errs.mean()
                valid_cost = valid_costs.mean()
                valid_alpha_ent = -negentropy(valid_alphas)

                mean_valid_alphas = valid_alphas_.mean()
                std_valid_alphas = valid_alphas_.std()

                mean_valid_probs = valid_probs.argmax(1).mean()
                std_valid_probs = valid_probs.argmax(1).std()

                history_errs.append([valid_cost, valid_err])

                stats['train_err_ave'].append(train_err_ave)
                stats['train_cost_ave'].append(train_cost_ave)
                stats['train_gnorm_ave'].append(train_gnorm_ave)

                stats['valid_errs'].append(valid_err)
                stats['valid_costs'].append(valid_cost)
                stats['valid_err_ent'].append(error_ent)
                stats['valid_err_desc_ent'].append(error_dent)

                stats['valid_alphas_mean'].append(mean_valid_alphas)
                stats['valid_alphas_std'].append(std_valid_alphas)
                stats['valid_alphas_ent'].append(valid_alpha_ent)

                stats['valid_probs_mean'].append(mean_valid_probs)
                stats['valid_probs_std'].append(std_valid_probs)

                if uidx == 0 or valid_err <= numpy.array(
                        history_errs)[:, 1].min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                    best_found = True
                else:
                    bst_found = False

                if numpy.isnan(valid_err):
                    import ipdb
                    ipdb.set_trace()

                print "============================"
                print '\t>>>Valid error: ', valid_err, \
                        ' Valid cost: ', valid_cost
                print '\t>>>Valid pred mean: ', mean_valid_probs, \
                        ' Valid pred std: ', std_valid_probs
                print '\t>>>Valid alphas mean: ', mean_valid_alphas, \
                        ' Valid alphas std: ', std_valid_alphas, \
                        ' Valid alpha negent: ', valid_alpha_ent, \
                        ' Valid error ent: ', error_ent, \
                        ' Valid error desc ent: ', error_dent

                print "============================"
                print "Running average train stats "
                print '\t>>>Train error: ', train_err_ave, \
                        ' Train cost: ', train_cost_ave, \
                        ' Train grad norm: ', train_gnorm_ave
                print "============================"


                train_cost_ave, train_err_ave, \
                    train_gnorm_ave = reset_train_vals()

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid.reset()
    valid_cost, valid_error, valid_probs, \
            valid_alphas, error_ent = eval_model(f_log_probs,
                                      prepare_data if not opt_ds['use_sent_reps'] \
                                           else prepare_data_sents,
                                      model_options, valid,
                                      use_sent_rep=opt_ds['use_sent_rep'])

    print " Final eval resuts: "
    print 'Valid error: ', valid_error.mean()
    print 'Valid cost: ', valid_cost.mean()
    print '\t>>>Valid pred mean: ', valid_probs.mean(), \
            ' Valid pred std: ', valid_probs.std(), \
            ' Valid error ent: ', error_ent

    params = copy.copy(best_p)

    numpy.savez(mpath_last,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err, valid_cost
예제 #37
0
def trainer(data='coco',  #f8k, f30k, coco
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',  # gru OR bow
            max_epochs=15,
            dispFreq=10,
            decay_c=0.,
            grad_clip=2.,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 128,
            saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
            validFreq=100,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train[0]+dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl'%saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building sentence encoder'
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0
    
    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                    print 'Done'

        print 'Seen %d samples'%n_samples
예제 #38
0
def trainer(X, 
            dim_word=620, # word vector dimensionality
            dim=2400, # the number of GRU units
            encoder='gru',
            decoder='gru',
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=20000,
            maxlen_w=30,
            optimizer='adam',
            batch_size = 64,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            saveFreq=1000,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder 
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    # TODO: if loading old parameters you need to make sure you are using them
    #  in the rest of the code
    # if reload_ and os.path.exists(saveto):
    #     print 'reloading...' + saveto
    #     with open('%s.pkl'%saveto, 'rb') as f:
    #         model_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto + '.npz', params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

        print 'Seen %d samples'%n_samples
예제 #39
0
def trainer(
        X,
        dim_word=620,  # word vector dimensionality
        dim=2400,  # the number of GRU units
        encoder='gru',
        decoder='gru',
        max_epochs=5,
        dispFreq=1,
        decay_c=0.,
        grad_clip=5.,
        n_words=474000,
        maxlen_w=30,
        optimizer='adam',
        batch_size=64,
        saveto='/data/embeddingModel.npz',
        dictionary='dictionary.pkl',
        saveFreq=1000,
        reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print('Loading dictionary...')
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.items():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print('Building model')
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print('Building f_log_probs...')
    f_log_probs = theano.function(inps, cost, profile=False)
    print('Done')

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.items():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print('Building f_cost...')
    f_cost = theano.function(inps, cost, profile=False)
    print('Done')

    print('Done')
    print('Building f_grad...')
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([],
                                    [(t**2).sum() for k, t in tparams.items()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print('Building optimizers...', end='')
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print('Optimization')

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX,
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in range(max_epochs):
        n_samples = 0

        print('Epoch ', eidx)

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(
                x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x is None:
                print('Minibatch with zero sample under length ', maxlen_w)
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)

            if numpy.mod(uidx, saveFreq) == 0:
                print('Saving...', end='')

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print('Done')

        print('Seen %d samples' % n_samples)
예제 #40
0
파일: nmt.py 프로젝트: caglar/nmt
def train(dim_word=100, # word vector dimensionality
          dim=1000, # the number of LSTM units
          encoder='gru',
          decoder='gru_cond',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          diag_c=0.,
          clip_c=-1.,
          lrate=0.01,
          n_words_src=100000,
          n_words=100000,
          maxlen=100, # maximum length of the description
          optimizer='rmsprop',
          batch_size = 16,
          valid_batch_size = 16,
          saveto='model.npz',
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          sampleFreq=100, # generate some samples after every sampleFreq updates
          datasets=['/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok',
                    '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'],
          valid_datasets=['../data/dev/newstest2011.en.tok', '../data/dev/newstest2011.fr.tok'],
          dictionaries=['/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok.pkl',
                        '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok.pkl'],
          use_dropout=False,
          reload_=False):

    # Model options
    model_options = locals().copy()

    worddicts = [None] * len(dictionaries)
    worddicts_r = [None] * len(dictionaries)
    for ii, dd in enumerate(dictionaries):
        with open(dd, 'rb') as f:
            worddicts[ii] = pkl.load(f)
        worddicts_r[ii] = dict()
        for kk, vv in worddicts[ii].iteritems():
            worddicts_r[ii][vv] = kk

    # reload options
    if reload_ and os.path.exists(saveto):
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    print 'Loading data'
    train = TextIterator(datasets[0], datasets[1],
                         dictionaries[0], dictionaries[1],
                         n_words_source=n_words_src, n_words_target=n_words,
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(valid_datasets[0], valid_datasets[1],
                         dictionaries[0], dictionaries[1],
                         n_words_source=n_words_src, n_words_target=n_words,
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, use_noise, \
          x, x_mask, y, y_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask]

    print 'Buliding sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=profile)
    print 'Done'

    cost = cost.mean()

    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    if alpha_c > 0. and not model_options['decoder'].endswith('simple'):
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * ((tensor.cast(y_mask.sum(0)//x_mask.sum(0), 'float32')[:,None]-
                                opt_ret['dec_alphas'].sum(0))**2).sum(1).mean()
        cost += alpha_reg

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=profile)
    print 'Done'

    print 'Computing gradient...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print 'Done'
    print 'Building f_grad...',
    f_grad = theano.function(inps, grads, profile=profile)
    print 'Done'

    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (clip_c**2),
                                           g / tensor.sqrt(g2) * clip_c,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    print 'Optimization'

    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = list(numpy.load(saveto)['history_errs'])
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0])/batch_size
    if saveFreq == -1:
        saveFreq = len(train[0])/batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0])/batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x, y in train:
            n_samples += len(x)
            uidx += 1
            use_noise.set_value(1.)

            x, x_mask, y, y_mask = prepare_data(x, y, maxlen=maxlen,
                                                n_words_src=n_words_src,
                                                n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)

                numpy.savez(saveto, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(numpy.minimum(5,x.shape[1])):
                    stochastic = True
                    sample, score = gen_sample(tparams, f_init, f_next, x[:,jj][:,None],
                                               model_options, trng=trng, k=1, maxlen=30,
                                               stochastic=stochastic, argmax=False)
                    print 'Source ', jj, ': ',
                    for vv in x[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[0]:
                            print worddicts_r[0][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Truth ', jj, ' : ',
                    for vv in y[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Sample ', jj, ': ',
                    if stochastic:
                        ss = sample
                    else:
                        score = score / numpy.array([len(s) for s in sample])
                        ss = sample[score.argmin()]
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid)
                valid_err = valid_errs.mean()
                history_errs.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(history_errs).min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                if len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean()
    print 'Valid ', valid_err
    params = copy.copy(best_p)
    numpy.savez(saveto, zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err
def train(dim_word_desc=400,# word vector dimensionality
          dim_word_q=400,
          dim_word_ans=600,
          dim_proj=300,
          dim=400,# the number of LSTM units
          encoder_desc='lstm',
          encoder_desc_word='lstm',
          encoder_desc_sent='lstm',
          use_dq_sims=False,
          eyem=None,
          learn_h0=False,
          use_desc_skip_c_g=False,
          debug=False,
          encoder_q='lstm',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          clip_c=-1.,
          lrate=0.01,
          n_words_q=49145,
          n_words_desc=115425,
          n_words_ans=409,
          pkl_train_files=None,
          pkl_valid_files=None,
          maxlen=2000, # maximum length of the description
          optimizer='rmsprop',
          batch_size=2,
          vocab=None,
          valid_batch_size=16,
          use_elu_g=False,
          saveto='model.npz',
          model_dir=None,
          ms_nlayers=3,
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          datasets=[None],
          truncate=400,
          momentum=0.9,
          use_bidir=False,
          cost_mask=None,
          valid_datasets=['/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5',
                          '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'],
          dropout_rate=0.5,
          use_dropout=True,
          reload_=True,
          **opt_ds):

    ensure_dir_exists(model_dir)
    mpath = os.path.join(model_dir, saveto)
    mpath_best = os.path.join(model_dir, prfx("best", saveto))
    mpath_last = os.path.join(model_dir, prfx("last", saveto))
    mpath_stats = os.path.join(model_dir, prfx("stats", saveto))

    # Model options
    model_options = locals().copy()
    model_options['use_sent_reps'] = opt_ds['use_sent_reps']
    stats = defaultdict(list)

    del model_options['eyem']
    del model_options['cost_mask']

    if cost_mask is not None:
        cost_mask = sharedX(cost_mask)

    # reload options and parameters
    if reload_:
        print "Reloading the model."
        if os.path.exists(mpath_best):
            print "Reloading the best model from %s." % mpath_best
            with open(os.path.join(mpath_best, '%s.pkl' % mpath_best), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath_best, params)
        elif os.path.exists(mpath):
            print "Reloading the model from %s." % mpath
            with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath, params)
        else:
            raise IOError("Couldn't open the file.")
    else:
        print "Couldn't reload the models initializing from scratch."
        params = init_params(model_options)

    if datasets[0]:
        print "Short dataset", datasets[0]

    print 'Loading data'
    print 'Building model'
    if pkl_train_files is None or pkl_valid_files is None:
        train, valid, test = load_data(path=datasets[0],
                                       valid_path=valid_datasets[0],
                                       test_path=valid_datasets[1],
                                       batch_size=batch_size,
                                       **opt_ds)
    else:
        train, valid, test = load_pkl_data(train_file_paths=pkl_train_files,
                                           valid_file_paths=pkl_valid_files,
                                           batch_size=batch_size,
                                           vocab=vocab,
                                           eyem=eyem,
                                           **opt_ds)

    tparams = init_tparams(params)
    trng, use_noise, inps_d, \
                     opt_ret, \
                     cost, errors, ent_errors, ent_derrors, probs = \
                        build_model(tparams,
                                    model_options,
                                    prepare_data if not opt_ds['use_sent_reps'] \
                                            else prepare_data_sents,
                                    valid,
                                    cost_mask=cost_mask)

    alphas = opt_ret['dec_alphas']

    if opt_ds['use_sent_reps']:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'],
                inps_d['slen'], inps_d['qlen'],\
                inps_d['ent_mask']
                ]
    else:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'], \
                inps_d['qlen'], \
                inps_d['ent_mask']]

    outs = [cost, errors, probs, alphas]
    if ent_errors:
        outs += [ent_errors]

    if ent_derrors:
        outs += [ent_derrors]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, outs, profile=profile)
    print 'Done'

    # Apply weight decay on the feed-forward connections
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.

        for kk, vv in tparams.iteritems():
            if "logit" in kk or "ff" in kk:
                weight_decay += (vv ** 2).sum()

        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Computing gradient...',
    grads = safe_grad(cost, itemlist(tparams))
    print 'Done'

    # Gradient clipping:
    if clip_c > 0.:
        g2 = get_norms(grads)
        for p, g in grads.iteritems():
            grads[p] = tensor.switch(g2 > (clip_c**2),
                                     (g / tensor.sqrt(g2 + 1e-8)) * clip_c,
                                     g)
    inps.pop()
    if optimizer.lower() == "adasecant":
        learning_rule = Adasecant(delta_clip=25.0,
                                  use_adagrad=True,
                                  grad_clip=0.25,
                                  gamma_clip=0.)
    elif optimizer.lower() == "rmsprop":
        learning_rule = RMSPropMomentum(init_momentum=momentum)
    elif optimizer.lower() == "adam":
        learning_rule = Adam()
    elif optimizer.lower() == "adadelta":
        learning_rule = AdaDelta()

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    learning_rule = None

    if learning_rule:
        f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr,
                                                          grads=grads,
                                                          inp=inps,
                                                          cost=cost,
                                                          errors=errors)
    else:
        f_grad_shared, f_update = eval(optimizer)(lr,
                                                  tparams,
                                                  grads,
                                                  inps,
                                                  cost,
                                                  errors)

    print 'Done'
    print 'Optimization'
    history_errs = []
    # reload history
    if reload_ and os.path.exists(mpath):
        history_errs = list(numpy.load(mpath)['history_errs'])

    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    best_found = False
    uidx = 0
    estop = False

    train_cost_ave, train_err_ave, \
            train_gnorm_ave = reset_train_vals()

    for eidx in xrange(max_epochs):
        n_samples = 0

        if train.done:
            train.reset()

        for d_, q_, a, em in train:
            n_samples += len(a)
            uidx += 1
            use_noise.set_value(1.)

            if opt_ds['use_sent_reps']:
                # To mask the description and the question.
                d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(d_,
                                                                            q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d,
                                                           d_mask,
                                                           q,
                                                           q_mask,
                                                           a,
                                                           dlen,
                                                           slen,
                                                           qlen)
            else:
                d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask,
                                                           q, q_mask,
                                                           a,
                                                           dlen,
                                                           qlen)

            upnorm = f_update(lrate)
            ud = time.time() - ud_start

            # Collect the running ave train stats.
            train_cost_ave = running_ave(train_cost_ave,
                                         cost)
            train_err_ave = running_ave(train_err_ave,
                                        errors)
            train_gnorm_ave = running_ave(train_gnorm_ave,
                                          gnorm)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                import ipdb; ipdb.set_trace()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, ' Update ', uidx, \
                        ' Cost ', cost, ' UD ', ud, \
                        ' UpNorm ', upnorm[0].tolist(), \
                        ' GNorm ', gnorm, \
                        ' Pnorm ', pnorm, 'Terrors ', errors

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',
                if best_p is not None and best_found:
                    numpy.savez(mpath_best, history_errs=history_errs, **best_p)
                    pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb'))
                else:
                    params = unzip(tparams)

                numpy.savez(mpath, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % mpath, 'wb'))
                pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb'))

                print 'Done'
                print_param_norms(tparams)

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                if valid.done:
                    valid.reset()

                valid_costs, valid_errs, valid_probs, \
                        valid_alphas, error_ent, error_dent = eval_model(f_log_probs,
                                                  prepare_data if not opt_ds['use_sent_reps'] \
                                                    else prepare_data_sents,
                                                  model_options,
                                                  valid,
                                                  use_sent_rep=opt_ds['use_sent_reps'])

                valid_alphas_ = numpy.concatenate([va.argmax(0) for va  in valid_alphas.tolist()], axis=0)
                valid_err = valid_errs.mean()
                valid_cost = valid_costs.mean()
                valid_alpha_ent = -negentropy(valid_alphas)

                mean_valid_alphas = valid_alphas_.mean()
                std_valid_alphas = valid_alphas_.std()

                mean_valid_probs = valid_probs.argmax(1).mean()
                std_valid_probs = valid_probs.argmax(1).std()

                history_errs.append([valid_cost, valid_err])

                stats['train_err_ave'].append(train_err_ave)
                stats['train_cost_ave'].append(train_cost_ave)
                stats['train_gnorm_ave'].append(train_gnorm_ave)

                stats['valid_errs'].append(valid_err)
                stats['valid_costs'].append(valid_cost)
                stats['valid_err_ent'].append(error_ent)
                stats['valid_err_desc_ent'].append(error_dent)

                stats['valid_alphas_mean'].append(mean_valid_alphas)
                stats['valid_alphas_std'].append(std_valid_alphas)
                stats['valid_alphas_ent'].append(valid_alpha_ent)

                stats['valid_probs_mean'].append(mean_valid_probs)
                stats['valid_probs_std'].append(std_valid_probs)

                if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 1].min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                    best_found = True
                else:
                    bst_found = False

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()


                print "============================"
                print '\t>>>Valid error: ', valid_err, \
                        ' Valid cost: ', valid_cost
                print '\t>>>Valid pred mean: ', mean_valid_probs, \
                        ' Valid pred std: ', std_valid_probs
                print '\t>>>Valid alphas mean: ', mean_valid_alphas, \
                        ' Valid alphas std: ', std_valid_alphas, \
                        ' Valid alpha negent: ', valid_alpha_ent, \
                        ' Valid error ent: ', error_ent, \
                        ' Valid error desc ent: ', error_dent

                print "============================"
                print "Running average train stats "
                print '\t>>>Train error: ', train_err_ave, \
                        ' Train cost: ', train_cost_ave, \
                        ' Train grad norm: ', train_gnorm_ave
                print "============================"


                train_cost_ave, train_err_ave, \
                    train_gnorm_ave = reset_train_vals()


        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid.reset()
    valid_cost, valid_error, valid_probs, \
            valid_alphas, error_ent = eval_model(f_log_probs,
                                      prepare_data if not opt_ds['use_sent_reps'] \
                                           else prepare_data_sents,
                                      model_options, valid,
                                      use_sent_rep=opt_ds['use_sent_rep'])

    print " Final eval resuts: "
    print 'Valid error: ', valid_error.mean()
    print 'Valid cost: ', valid_cost.mean()
    print '\t>>>Valid pred mean: ', valid_probs.mean(), \
            ' Valid pred std: ', valid_probs.std(), \
            ' Valid error ent: ', error_ent

    params = copy.copy(best_p)

    numpy.savez(mpath_last,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err, valid_cost
예제 #42
0
def train(options, data, load_params=False, start_epoc=0):
    print "OPTIONS: ", options
    print 'Setting up model with options:'
    options = set_defaults(options)
    for kk, vv in options.iteritems():
        print kk, vv
    print "model seed: ", options['model_seed']
    print "fold: ", options['fold']
    print 'seed: ', options['seed']
    rng = numpy.random.RandomState(options['model_seed'] +
                                   100 * options.get('fold', 99) +
                                   options.get('seed', 99))
    params, operators = init_params(options, rng)
    print 'done...'

    if load_params:
        loaded = load_par(options)
        start_epoc = resume_epoc(options)
        # Check that we've loaded the correct parameters...
        for kk, vv in loaded.iteritems():
            assert params[kk].shape == vv.shape
            assert type(params[kk]) == type(vv)
        params = loaded

    tparams = init_tparams(params)

    trng, use_noise, inps, out = build_model(tparams, options, rng)
    y = tensor.imatrix('y')
    cost = nll(out, y)

    f_eval = theano.function([inps, y],
                             cost,
                             givens={use_noise: numpy.float32(0.)},
                             on_unused_input='ignore')

    reg = 0.
    for k, v in tparams.iteritems():
        if k[:6] == 'hidden' or k[-3:] == 'W_h':
            reg += options['l1'] * tensor.sum(abs(v))
            reg += options['l2'] * tensor.sum((v)**2)

    cost += reg

    grads = tensor.grad(cost, wrt=itemlist(tparams))
    lr = tensor.scalar(name='lr', dtype=theano.config.floatX)
    opt = get_optim(options['opt'])
    print 'Compiling functions'
    f_grad_shared, f_update, gshared = opt(lr, tparams, grads, [inps, y], cost,
                                           use_noise)
    f_out = theano.function([inps],
                            out,
                            givens={use_noise: numpy.float32(0.)},
                            on_unused_input='ignore',
                            allow_input_downcast=True)

    best = numpy.inf
    print 'Starting training'

    train = list_update(data[0], f_eval, options['batch_size'], rng=rng)
    test = list_update(data[-1], f_eval, options['batch_size'], rng=rng)
    starting = (train, test)
    print 'Pre-training. test: %f, train: %f' % (test, train)
    print 'Training'
    lr = options['lr']
    max_itr = options['max_itr']
    grad_norm = 0.
    train_scores = 50 * [0.]
    try:
        for epoch in xrange(max_itr):
            start_time = time.time()
            for g in gshared:
                # manually set gradients to 0 because we accumulate in list update
                g.set_value(0.0 * g.get_value())
            use_noise.set_value(1.)
            train_cost, n_obs = list_update(data[0],
                                            f_grad_shared,
                                            batchsize=options['batch_size'],
                                            rng=rng,
                                            return_n_obs=True)
            use_noise.set_value(0.)
            for g in gshared:
                g.set_value(floatx(g.get_value() / float(n_obs)))
            f_update(lr)
            apply_proximity(tparams, operators)
            train = list_update(data[0],
                                f_eval,
                                options['batch_size'],
                                rng=rng)
            elapsed_time = time.time() - start_time

            if train < best:
                # early stopping on training set
                test = list_update(data[-1], f_eval)
                best_par = unzip(tparams)
                best_perf = (train, test)
                best = train

            test = list_update(data[-1], f_eval)

            if (epoch % 50) == 0:
                # Save progress....
                save_progress(options, tparams, epoch, best_perf)
                print 'Epoch: %d, cost: %f, train: %f, test: %f, lr:%f, time: %f' % (
                    epoch, train_cost, train, test, lr, elapsed_time)

            # Check if we're diverging...
            train_ave = running_ave(train_scores, train, epoch)

            if epoch > 1000:
                # Only exit if we're diverging after 1000 iterations
                if train_ave > 1.03 * best_perf[0]:
                    print "Diverged..."
                    break
    except KeyboardInterrupt:
        print "Interrupted"
    # check that we're outputing prob distributions
    X = data[0][(3, 3)][0]
    assert abs(
        f_out(X.reshape(X.shape[0], 2, 3, 3)).sum() - float(X.shape[0])) < 1e-4
    print "Best performance:"
    print "train, test"
    print "%f,%f" % best_perf
    return best_perf, best_par