Beispiel #1
0
    def getTokenizedStream(X, Y, sources,
            batch_size=128, embedding_dim=300):
        """getTokenizedStream returns data with images as cnn features
        and captions as tokenized words. NOT glove vectors.

        This is being used for the end2end implementation. See google's paper
        "Show and Tell: Generating Image Captions".  Thus we are not using
        contrastive examples either.
        """
        trX, trY = (X, Y)

        # Transforms
        trXt=lambda x: floatX(x)
        Yt=lambda y: intX(SeqPadded(vect.transform(sampleCaptions(y)), 'back'))

        # Foxhound Iterators
        # RCL: Write own iterator to sample positive examples/captions, since there are 5 for each image.
        train_iterator = iterators.Linear(
            trXt=trXt, trYt=Yt, size=batch_size, shuffle=False
            )

        # FoxyDataStreams
        train_stream = FoxyDataStream(
              (trX, trY)
            , sources
            , train_iterator
            , FoxyIterationScheme(len(trX), batch_size)
            )

        train_stream.iteration_scheme = FoxyIterationScheme(len(trX), batch_size)
        return train_stream
Beispiel #2
0
    def getMLBStream(X, Y, sources, batch_size=128, embedding_dim=300,
            shuffle=False):
        """Returns sources in the format
        (sources[0], sources[1]--MultiLabelBinarized)
        """
        trX, trY = (X, Y)

        # Transforms
        trXt=lambda x: floatX(x)
        Yt=lambda y: intX(mlb.transform(vect.transform(concatCaptions(y))))

        # Foxhound Iterators
        train_iterator = iterators.Linear(
            trXt=trXt, trYt=Yt, size=batch_size, shuffle=shuffle
            )

        # FoxyDataStreams
        train_stream = FoxyDataStream(
              (trX, trY)
            , sources
            , train_iterator
            , FoxyIterationScheme(len(trX), batch_size)
            )

        train_stream.iteration_scheme = FoxyIterationScheme(len(trX), batch_size)

        return train_stream
Beispiel #3
0
 def __call__(self, params, cost, consider_constant=None):
     updates = []
     # if self.clipnorm > 0:
         # print('clipping grads', self.clipnorm)
         # grads = T.grad(theano.gradient.grad_clip(cost, 0, self.clipnorm), params)
     grads = T.grad(cost, params, consider_constant=consider_constant)
     grads = clip_norms(grads, self.clipnorm)  
     t = theano.shared(floatX(1.))
     b1_t = self.b1*self.l**(t-1)
  
     for p, g in zip(params, grads):
         g = self.regularizer.gradient_regularize(p, g)
         m = theano.shared(p.get_value() * 0.)
         v = theano.shared(p.get_value() * 0.)
  
         m_t = b1_t*m + (1 - b1_t)*g
         v_t = self.b2*v + (1 - self.b2)*g**2
         m_c = m_t / (1-self.b1**t)
         v_c = v_t / (1-self.b2**t)
         p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
         p_t = self.regularizer.weight_regularize(p_t)
         updates.append((m, m_t))
         updates.append((v, v_t))
         updates.append((p, p_t) )
     updates.append((t, t + 1.))
     return updates
Beispiel #4
0
    def getFinalStream(X, Y, sources, sources_k, batch_size=128, embedding_dim=300,
        shuffle=False):
        """Despite horrible variable names, this method
        gives back the final stream for both train or test data

        batch_size:
        embedding_dim: for glove vects
        min_df and max_features: for Tokenizer

        Returns
        -------
        merged stream with sources = sources + sources_k
        """
        trX, trY = (X, Y)
        trX_k, trY_k = (X, Y)

        # Transforms
        trXt=lambda x: floatX(x)
        Yt=lambda y: intX(SeqPadded(vect.transform(sampleCaptions(y)), 'back'))

        # Foxhound Iterators
        # RCL: Write own iterator to sample positive examples/captions, since there are 5 for each image.
        train_iterator = iterators.Linear(
            trXt=trXt, trYt=Yt, size=batch_size, shuffle=shuffle
            )
        train_iterator_k = iterators.Linear(
            trXt=trXt, trYt=Yt, size=batch_size, shuffle=shuffle
            )

        # FoxyDataStreams
        train_stream = FoxyDataStream(
              (trX, trY)
            , sources
            , train_iterator
            , FoxyIterationScheme(len(trX), batch_size)
            )

        train_stream_k = FoxyDataStream(
              (trX_k, trY_k)
            , sources_k
            , train_iterator_k
            , FoxyIterationScheme(len(trX), batch_size)
            )
        glove_version = "glove.6B.%sd.txt.gz" % embedding_dim
        train_transformer = GloveTransformer(
            glove_version, data_stream=train_stream, vectorizer=vect
            )
        train_transformer_k = GloveTransformer(
            glove_version, data_stream=train_stream_k, vectorizer=vect
            )

        # Final Data Streams w/ contrastive examples
        final_train_stream = Merge(
              (train_transformer, ShuffleBatch(train_transformer_k))
            , sources + sources_k
            )
        final_train_stream.iteration_scheme = FoxyIterationScheme(len(trX), batch_size)

        return final_train_stream
    def getFinalStream(X,
                       Y,
                       sources,
                       sources_k,
                       batch_size=128,
                       embedding_dim=300,
                       shuffle=False):
        """
        Returns
        -------
        merged stream with sources = sources + sources_k
        """
        trX, trY = (X, Y)
        trX_k, trY_k = (X, Y)

        # Transforms
        trXt = lambda x: floatX(x)
        Yt = lambda y: intX(
            SeqPadded(vect.transform(sampleCaptions(y)), 'back'))

        # Foxhound Iterators
        # RCL: Write own iterator to sample positive examples/captions, since there are 5 for each image.
        train_iterator = iterators.Linear(trXt=trXt,
                                          trYt=Yt,
                                          size=batch_size,
                                          shuffle=shuffle)
        train_iterator_k = iterators.Linear(trXt=trXt,
                                            trYt=Yt,
                                            size=batch_size,
                                            shuffle=True)

        # FoxyDataStreams
        train_stream = FoxyDataStream(
            (trX, trY), sources, train_iterator,
            FoxyIterationScheme(len(trX), batch_size))

        train_stream_k = FoxyDataStream(
            (trX_k, trY_k), sources_k, train_iterator_k,
            FoxyIterationScheme(len(trX), batch_size))
        glove_version = "glove.6B.%sd.txt.gz" % embedding_dim
        train_transformer = GloveTransformer(glove_version,
                                             data_stream=train_stream,
                                             vectorizer=vect)
        train_transformer_k = GloveTransformer(glove_version,
                                               data_stream=train_stream_k,
                                               vectorizer=vect)

        # Final Data Streams w/ contrastive examples
        final_train_stream = Merge(
            (train_transformer, ShuffleBatch(train_transformer_k)),
            sources + sources_k)
        final_train_stream.iteration_scheme = FoxyIterationScheme(
            len(trX), batch_size)

        return final_train_stream
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        self.loader = Loader(X, self.train_load, self.train_transform, self.size)
        self.proc = Process(target=self.loader.load)
        self.proc.start()

        for ymb in iter_data(Y, size=self.size):
            xmb = self.loader.get()
            yield xmb, floatX(ymb)
Beispiel #7
0
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        self.loader = Loader(X, self.train_load, self.train_transform,
                             self.size)
        self.proc = Process(target=self.loader.load)
        self.proc.start()

        for ymb in iter_data(Y, size=self.size):
            xmb = self.loader.get()
            yield xmb, floatX(ymb)
    kde = gaussian_kde(gs)

    plt.clf()
    plt.plot(xs, ps, '--', lw=2)
    plt.plot(xs, kde(xs), lw=2)
    plt.plot(xs, preal, lw=2)
    plt.xlim([-5., 5.])
    plt.ylim([0., 1.])
    plt.ylabel('Prob')
    plt.xlabel('x')
    plt.legend(['P(data)', 'G(z)', 'D(x)'])
    plt.title('GAN learning guassian')
    fig.canvas.draw()
    plt.show(block=False)
    show()


#Train both networks
for i in range(100):
    #get the uniform distribution of both networks
    zmb = np.random.uniform(-1, 1, size=(batch_size, 1)).astype('float32')
    xmb = np.random.normal(1., 1, size=(batch_size, 1)).astype('float32')
    if i % 10 == 0:
        _train_g(xmb, zmb)
    else:
        _train_d(xmb, zmb)
    if i % 10 == 0:
        print(i)
        vis(i)
    lrt.set_value(floatX(lrt.get_value() * 0.9999))
Beispiel #9
0
    preal = _score(xs.reshape(-1, 1)).flatten()
    kde = gaussian_kde(gs)

    plt.clf()
    plt.plot(xs, ps, '--', lw=2)
    plt.plot(xs, kde(xs), lw=2)
    plt.plot(xs, preal, lw=2)
    plt.xlim([-5., 5.])
    plt.ylim([0., 1.])
    plt.ylabel('Prob')
    plt.xlabel('x')
    plt.legend(['P(data)', 'G(z)', 'D(x)'])
    plt.title('GAN learning guassian')
    fig.canvas.draw()
    plt.show(block=False)
    show()

#Train both networks
for i in range(100):
    #get the uniform distribution of both networks
    zmb = np.random.uniform(-1, 1, size=(batch_size, 1)).astype('float32')
    xmb = np.random.normal(1., 1, size=(batch_size, 1)).astype('float32')
    if i % 10 == 0:
        _train_g(xmb, zmb)
    else:
        _train_d(xmb, zmb)
    if i % 10 == 0:
        print i
        vis(i)
    lrt.set_value(floatX(lrt.get_value()*0.9999))
Beispiel #10
0
which assumes that y_hat is shape (n_examples, n_classes).
This doesn't always work when our y is not one-hotted

# error_rate = MisclassificationRate().apply(y, y_hat)
"""
cg = ComputationGraph(cost)

# # # # # # # # # # # 
# Modeling Training #
# # # # # # # # # # #

# Transforms
trXt=lambda x: intX(SeqPadded(vect.transform(LenClip(x, 1000))))
teXt=lambda x: intX(SeqPadded(vect.transform(x)))
Yt=lambda y: floatX(y).reshape(-1, 1)

# Foxhound Iterators
train_iterator = iterators.Linear(trXt=trXt, trYt=Yt)
test_iterator = iterators.Linear(trXt=teXt, trYt=Yt)

# DataStreams
train_stream = FoxyDataStream(trX, trY, train_iterator)
test_stream = FoxyDataStream(teX, teY, test_iterator)

# import ipdb
# ipdb.set_trace()
# Train
algorithm = GradientDescent(
      cost=cost
    , parameters=cg.parameters
Beispiel #11
0
        ops.Project(dim=500),
        ops.Activation('tanh'),
        ops.Project(dim=75),
        ops.Activation('tanh'),
        ops.Project(dim=75),
        ops.Activation('tanh'),
        ops.Project(dim=75),
        ops.Activation('tanh'),
        ops.Project(dim=1),
        ops.Activation('sigmoid')
    ]
    return model


# Learn and Predict
trXt = lambda x: floatX((np.asarray(x)))
teXt = trXt
if binary_output:
    trYt = lambda y: floatX(y)
else:
    trYt = lambda y: floatX(OneHot(y, 2))
iterator = iterators.Linear(size=80, trXt=trXt, teXt=teXt, trYt=trYt)
model = model_MLP(trX.shape[1])
model = Network(model, iterator=iterator)

continue_epochs = True
min_cost_delta = .00001
min_cost = .001
cost0, cost1 = None, None
epoch_count = 0
Beispiel #12
0
# Figure out data source
train = CIFAR10("train")
test = CIFAR10("test")

# Load Data Using Fuel
train_stream = DataStream.default_stream(
      dataset=train
    , iteration_scheme=ShuffledScheme(train.num_examples, batch_size=128))
test_stream = DataStream.default_stream(
      dataset=test
    , iteration_scheme=ShuffledScheme(test.num_examples, batch_size=1024))

train_epoch, test_epoch = [stream.get_epoch_iterator() for stream in [train_stream, test_stream]]


trXt = lambda x: floatX(Fliplr(Patch(np.asarray(x).transpose(0, 2, 3, 1), 28, 28))).transpose(0, 3, 1, 2)
teXt = lambda x: floatX(CenterCrop(np.asarray(x).transpose(0, 2, 3, 1), 28, 28)).transpose(0, 3, 1, 2)
trYt = lambda y: floatX(OneHot(y, 10))
iterator = iterators.Linear(trXt=trXt, teXt=teXt, trYt=trYt)

def get_entire_stream(epoch_iterator):
    Xs = []
    Ys = []
    for xmb, ymb in epoch_iterator:
        Xs.append(xmb)
        Ys.append(ymb)
    X = np.vstack(Xs)
    Y = np.hstack(Ys)
    return X, Y

Beispiel #13
0
        ops.Project(dim=500),
        ops.Activation("tanh"),
        ops.Project(dim=75),
        ops.Activation("tanh"),
        ops.Project(dim=75),
        ops.Activation("tanh"),
        ops.Project(dim=75),
        ops.Activation("tanh"),
        ops.Project(dim=1),
        ops.Activation("sigmoid"),
    ]
    return model


# Learn and Predict
trXt = lambda x: floatX((np.asarray(x)))
teXt = trXt
if binary_output:
    trYt = lambda y: floatX(y)
else:
    trYt = lambda y: floatX(OneHot(y, 2))
iterator = iterators.Linear(size=80, trXt=trXt, teXt=teXt, trYt=trYt)
model = model_MLP(trX.shape[1])
model = Network(model, iterator=iterator)

continue_epochs = True
min_cost_delta = 0.00001
min_cost = 0.001
cost0, cost1 = None, None
epoch_count = 0