def __init__(self, size_vocab, size_embed, size, depth, size_target, max_norm=None, lr=0.0002): autoassign(locals()) self.updater = util.Adam(max_norm=self.max_norm, lr=self.lr) self.Encode = Encoder(self.size_vocab, self.size_embed, self.size, self.depth) self.ToImg = Dense(self.size, self.size_target) self.inputs = [T.imatrix()] self.target = T.fmatrix()
def __init__(self, size_repr=1024, size_hidden=200, dropout=0.0, lr=0.0002): autoassign(locals()) self.size_classify = 3 if self.size_hidden is None: self.network = LinearClassify(size_repr=self.size_repr, size_classify=self.size_classify, dropout=self.dropout) else: self.network = Classify(size_repr=self.size_repr, size_hidden=self.size_hidden, size_classify=self.size_classify, activation=tanh, dropout=self.dropout) premise = T.fmatrix() hypo = T.fmatrix() target = T.fmatrix() # should be one hot with context.context(training=True): predicted = self.network(premise, hypo) cost = CrossEntropy(target, predicted) with context.context(training=False): predicted_test = self.network(premise, hypo) cost_test = CrossEntropy(target, predicted_test) self.updater = Adam(lr=self.lr) updates = self.updater.get_updates(self.network.params, cost, disconnected_inputs='error') self.train = theano.function([premise, hypo, target], cost, updates=updates) self.loss_test = theano.function([premise, hypo, target], cost_test) self.predict = theano.function([premise, hypo], predicted_test)
def __init__(self, size_vocab, _size_embed, size, depth, # TODODODO remove size_embed from this residual=False, activation=clipped_rectify, filter_length=6, filter_size=1024, stride=3): # FIXME use a more reasonable default autoassign(locals()) self.Conv = Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) self.GRU = StackedGRUH0(self.filter_size, self.size, self.depth, activation=self.activation, residual=self.residual)
def __init__(self, size_vocab, size_embed, size, depth, residual=False, activation=clipped_rectify): autoassign(locals()) self.Embed = OneHot(self.size_vocab) self.GRU = StackedGRUH0(self.size_vocab, self.size, self.depth, activation=self.activation, residual=self.residual)
def __init__(self, size_vocab, size, depth=1, recur_depth=1, filter_length=6, filter_size=64, stride=2, drop_i=0.75 , drop_s=0.25, residual=False, seed=1): autoassign(locals()) self.Conv = Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) self.RHN = StackedRHN0(self.filter_size, self.size, depth=self.depth, recur_depth=self.recur_depth, drop_i=self.drop_i, drop_s=self.drop_s, residual=self.residual, seed=self.seed)
def __init__(self, size_vocab, size_embed, size, size_out, depth, network, alpha=0.5, gru_activation=clipped_rectify, visual_activation=linear, visual_encoder=StackedGRUH0, cost_visual=CosineDistance, max_norm=None, lr=0.0002, dropout_prob=0.0): autoassign(locals()) self.network = network(self.size_vocab, self.size_embed, self.size, self.size_out, self.depth, gru_activation=self.gru_activation, visual_activation=self.visual_activation, visual_encoder=self.visual_encoder, dropout_prob=self.dropout_prob) self.input = T.imatrix() self.output_t_prev = T.imatrix() self.output_t = T.imatrix() self.output_v = T.fmatrix() self.OH = OneHot(size_in=self.size_vocab) self.output_t_oh = self.OH(self.output_t) self.updater = util.Adam(max_norm=self.max_norm, lr=self.lr) self.train = self._make_train() self.loss_test = self._make_loss_test()
def __init__(self, size_vocab, size_embed, size, size_target): autoassign(locals()) self.Shared = Embedding(self.size_vocab, self.size_embed) self.EncodeV = StackedGRUH0(size_embed, size, depth=1, activation=clipped_rectify) self.EncodeLM = StackedGRUH0(size_embed, size, depth=1, activation=clipped_rectify) self.ToTxt = Dense(size, size_vocab) self.ToImg = Dense(size, size_target)
def __init__(self, provider, tokenize=words, min_df=10, scale=True, scale_input=False, batch_size=64, shuffle=False, limit=None, curriculum=False, val_vocab=False): autoassign(locals()) self.data = {} self.mapper = util.IdMapper(min_df=self.min_df) self.scaler = StandardScaler() if scale else NoScaler() self.audio_scaler = InputScaler() if scale_input else NoScaler() parts = insideout(self.shuffled(arrange(provider.iterImages(split='train'), tokenize=self.tokenize, limit=limit))) parts_val = insideout(self.shuffled(arrange(provider.iterImages(split='val'), tokenize=self.tokenize))) # TRAINING if self.val_vocab: _ = list(self.mapper.fit_transform(parts['tokens_in'] + parts_val['tokens_in'])) parts['tokens_in'] = self.mapper.transform(parts['tokens_in']) # FIXME UGLY HACK else: parts['tokens_in'] = self.mapper.fit_transform(parts['tokens_in']) parts['tokens_out'] = self.mapper.transform(parts['tokens_out']) parts['img'] = self.scaler.fit_transform(parts['img']) parts['audio'] = self.audio_scaler.fit_transform(parts['audio']) self.data['train'] = outsidein(parts) # VALIDATION parts_val['tokens_in'] = self.mapper.transform(parts_val['tokens_in']) parts_val['tokens_out'] = self.mapper.transform(parts_val['tokens_out']) parts_val['img'] = self.scaler.transform(parts_val['img']) parts_val['audio'] = self.audio_scaler.transform(parts_val['audio']) self.data['valid'] = outsidein(parts_val) self.batcher = Batcher(self.mapper, pad_end=False)
def __init__(self, size_embed, size, size_out, gru_activation=clipped_rectify, visual_activation=linear): autoassign(locals()) self.Encode = BidiGRUH0(self.size_embed, self.size, activation=self.gru_activation) self.ToImg = Dense(self.size, self.size_out)
def __init__( self, size_vocab, size_embed, size, size_out, depth, gru_activation=clipped_rectify, visual_encoder=StackedGRUH0, visual_activation=linear, dropout_prob=0.0, ): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Visual = Visual( self.size_embed, self.size, self.size_out, self.depth, encoder=self.visual_encoder, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob, ) self.LM = StackedGRUH0( self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob ) self.ToTxt = Dense(self.size, self.size_vocab) # map to vocabulary
def __init__(self, size_in, size, depth=2, residual=False, fixed=False, **kwargs): # def __init__(self, size_in, size, depth=2, dropout_prob=0.0, residual=False, fixed=False, **kwargs): autoassign(locals()) f = lambda x: Residual(x) if self.residual else x self.layers = [ f(RHN0(self.size, self.size, fixed=self.fixed, **self.kwargs)) for _ in range(1,self.depth) ] self.bottom = RHN(self.size_in, self.size, **self.kwargs) self.stack = reduce(lambda z, x: x.compose(z), self.layers, Identity())
def __init__(self, size_embed, size, size_out, depth, gru_activation=tanh, dropout_prob=0.0): autoassign(locals()) self.Encode = StackedGRU(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.FromImg = Dense(self.size_out, self.size) self.Predict = Dense(self.size, self.size_embed) self.params = params(self.Encode, self.FromImg, self.Predict)
def __init__(self, size_vocab, size_embed, size, size_out, depth, depth_spec=1, visual_encoder=StackedGRUH0, gru_activation=clipped_rectify, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Shared = StackedGRUH0(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.Visual = Visual(self.size, self.size, self.size_out, self.depth_spec, encoder=self.visual_encoder, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob) self.LM = StackedGRU(self.size, self.size, self.depth_spec, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.ToTxt = Dense(self.size, self.size_embed) # try direct softmax
def __init__(self, provider, tokenize=words, min_df=10, scale=True, batch_size=64, shuffle=False, limit=None): autoassign(locals()) self.data = {} self.mapper = util.IdMapper(min_df=self.min_df) self.scaler = StandardScaler() if scale else NoScaler() # TRAINING sents_in, sents_out, imgs = zip(*self.shuffled( arrange(provider.iterImages(split='train'), tokenize=self.tokenize, limit=limit))) sents_in = self.mapper.fit_transform(sents_in) sents_out = self.mapper.transform(sents_out) imgs = self.scaler.fit_transform(imgs) self.data['train'] = zip(sents_in, sents_out, imgs) # VALIDATION sents_in, sents_out, imgs = zip(*self.shuffled( arrange(provider.iterImages(split='val'), tokenize=self.tokenize))) sents_in = self.mapper.transform(sents_in) sents_out = self.mapper.transform(sents_out) imgs = self.scaler.transform(imgs) self.data['valid'] = zip(sents_in, sents_out, imgs) self.batcher = Batcher(self.mapper, pad_end=False)
def __init__(self, size_vocab, size_embed, size, size_out, depth, gru_activation=clipped_rectify, visual_encoder=StackedGRUH0, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Visual = Visual(self.size_embed, self.size, self.size_out, self.depth, encoder=self.visual_encoder, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob) self.LM = StackedGRUH0(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.ToTxt = Dense(self.size, self.size_vocab) # map to vocabulary
def __init__(self, size_vocab, size_embed, size, depth): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.GRU = StackedGRUH0(self.size_embed, self.size, self.depth, activation=clipped_rectify)
def __init__(self, size_embed, size, depth=None, activation=None, dropout_prob=0.0): autoassign(locals()) assert self.size_embed == self.size self.Dropout0 = Dropout(prob=self.dropout_prob) self.Sum = Sum(self.size)
def __init__(self, config): autoassign(locals()) self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.Decode = Decoder(config['size_vocab'], config['size_embed'], config['size'], config['depth']) self.ToTxt = Dense(config['size'], config['size_vocab']) self.inputs = [T.imatrix()] self.target = T.imatrix()
def __init__(self, size_repr, size_hidden=200, size_classify=3, activation=tanh, dropout=0.0): autoassign(locals()) self.Dropout = Dropout(prob=self.dropout) self.L1 = WithDropout(Dense(self.size_repr * 2, self.size_hidden), prob=dropout) self.L2 = WithDropout(Dense(self.size_hidden, self.size_hidden), prob=dropout) self.L3 = WithDropout(Dense(self.size_hidden, self.size_hidden), prob=dropout) self.classify = Dense(self.size_hidden, self.size_classify) self.params = util.params(self.Dropout, self.L1, self.L2, self.L3, self.classify)
def __init__(self, size_vocab, size_embed, size, depth=1): autoassign(locals()) self.Embed = layer.Embedding(self.size_vocab, self.size_embed) self.Encode = layer.StackedGRUH0(self.size_embed, self.size, self.depth, activation=util.clipped_rectify)
def __init__(self, Shared, config): autoassign(locals()) self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.EncodeT = RHNFromConfig(config['EncodeT'], conditional=False) self.DecodeT = RHNFromConfig(config['DecodeT'], conditional=True) self.ToTxt = Dense(size, config['size_vocab']) self.inputs = [T.imatrix(), T.imatrix()] self.target = T.matrix()
def __init__(self, Shared, config): autoassign(locals()) self.margin_size = config['margin_size'] self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.EncodeV = RHNFromConfig(config, conditional=False) self.ImgEncode = Dense(config['size_target'], config['size'], init=eval(config['init_img'])) self.inputs = [T.imatrix()] self.target = T.fmatrix()
def __init__(self, size_embed, size, size_out, depth, gru_activation=clipped_rectify, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Encode = StackedGRUH0(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.ToImg = Dense(self.size, self.size_out)
def __init__(self, size_in, size, depth=2, dropout_prob=0.0, activation=tanh): autoassign(locals()) self.bottom = Dense(self.size_in, self.size) layers = [ Dense(self.size, self.size) for _ in range(1, self.depth) ] self.stack = reduce(lambda z, x: \ x.compose(WithDropout(Activation(self.activation).compose(z), self.dropout_prob)), \ layers, \ self.bottom) self.params = self.stack.params
def __init__(self, config): autoassign(locals()) self.margin_size = config.get('margin_size', 0.2) self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.Encode = Encoder(config['size_vocab'], config['size_embed']) self.ImgEncoder = Dense(config['size_target'], config['size_embed'], init=eval(config.get('init_img', 'orthogonal'))) self.inputs = [T.imatrix()] self.target = T.fmatrix() self.config['margin'] = self.config.get('margin', False)
def __init__(self, data_c, data_w, size_embed_c, size_embed_w, size, depth_c, depth_w, size_target): autoassign(locals()) self.updater = util.Adam() self.Encoder_c = Encoder(data_c.mapper.size(), size_embed_c, size, depth=depth_c) self.Encoder_w = Encoder(data_w.mapper.size(), size_embed_w, size, depth=depth_w) self.ToImg = layer.Dense(size, size_target) self.Task_c = EncoderTask(self.updater, self.Encoder_c, self.ToImg) self.Task_c.compile() self.Task_w = EncoderTask(self.updater, self.Encoder_w, self.ToImg) self.Task_w.compile()
def __init__(self, config): autoassign(locals()) self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.Decode = Decoder(config['size_vocab'], config['size_embed'], config['size'], config['depth'], activation=eval(config.get('activation','clipped_rectify')), residual=config.get('residual', False)) self.ToTxt = Dense(config['size'], config['size_vocab']) self.inputs = [T.imatrix()] self.target = T.imatrix()
def __init__(self, size_vocab, _size_embed, size, depth, # TODODODO remove size_embed from this residual=False, fixed=False, activation=clipped_rectify, gate_activation=steeper_sigmoid, init_in=orthogonal, init_recur=orthogonal, filter_length=6, filter_size=1024, stride=3, dropout_prob=0.0): # FIXME use a more reasonable default autoassign(locals()) self.Conv = Convolution1D(self.size_vocab, self.filter_length, self.filter_size, stride=self.stride) self.GRU = StackedGRUH0(self.filter_size, self.size, self.depth, activation=self.activation, residual=self.residual, gate_activation=self.gate_activation, init_in=self.init_in, init_recur=self.init_recur, dropout_prob=self.dropout_prob)
def __init__(self, size_vocab, size_embed, size, depth=1, recur_depth=1, drop_i=0.75 , drop_s=0.25, residual=False, seed=1): autoassign(locals()) if self.size_embed == 'onehot': self.Embed = OneHot(self.size_vocab) self.RHN = StackedRHN0(self.size_vocab, self.size, depth=self.depth, recur_depth=self.recur_depth, drop_i=self.drop_i, drop_s=self.drop_s, residual=self.residual, seed=self.seed) else: self.Embed = Embedding(self.size_vocab, self.size_embed) self.RHN = StackedRHN0(self.size_embed, self.size, depth=self.depth, recur_depth=self.recur_depth, drop_i=self.drop_i, drop_s=self.drop_s, residual=self.residual, seed=self.seed)
def __init__(self, size_vocab, size_embed, size, size_out, depth, gru_activation=clipped_rectify, visual_encoder=StackedGRUH0, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Visual = Visual(self.size_embed, self.size, self.size_out, self.depth, encoder=self.visual_encoder, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob) self.OH = OneHot(self.size_vocab)
def __init__(self, size_vocab, size_embed, size, size_target, depth): autoassign(locals()) self.Shared = visual.Encoder(size_vocab, size_embed, size, 1, residual=True, activation=clipped_rectify) self.EncodeV = StackedGRUH0(size, size, depth=self.depth-1, residual=True, activation=clipped_rectify) self.EncodeLM = StackedGRUH0(size, size, depth=1, residual=True, activation=clipped_rectify) self.ToTxt = Dense(size, size_vocab) self.ToImg = Dense(size, size_target)
def __init__(self, size_vocab, size_embed, size, size_out, depth, out_depth=1, # FIXME USE THIS PARAM gru_activation=tanh, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Encode = StackedGRUH0(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.DecodeT = StackedGRU(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.PredictT = Dense(size_in=self.size, size_out=self.size_embed) self.DecodeV = Dense(self.size, self.size_out) self.params = params(self.Embed, self.DecodeT, self.PredictT, self.DecodeV)
def __init__(self, size_vocab, size_embed, size, size_out, depth, textual, out_depth=1, gru_activation=tanh, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Visual = Visual(self.size_embed, self.size, self.size_out, self.depth, out_depth=self.out_depth, gru_activation=self.gru_activation, dropout_prob=self.dropout_prob) self.Textual = textual(self.size_embed, self.size, self.size_out, self.depth, gru_activation=self.gru_activation, dropout_prob=self.dropout_prob) self.params = params(self.Embed, self.Visual, self.Textual)
def __init__(self, provider, mapper, scaler, batch_size=64, with_para='auto', shuffle=False, fit=True, pad_end=False, reverse=False, tokenizer='word'): autoassign(locals()) self.data = {} # TRAINING if self.with_para == 'para_rand': sents_in, sents_out, imgs = zip(*self.shuffled(arrange_para_rand(provider.iterImages(split='train'), reverse=self.reverse, tokenizer=self.tokenizer ))) elif self.with_para == 'auto': sents_in, sents_out, imgs = zip(*self.shuffled(arrange_auto(provider.iterImages(split='train'), reverse=self.reverse, tokenizer=self.tokenizer ))) elif self.with_para == 'para_all': sents_in, sents_out, imgs = zip(*self.shuffled(arrange_para(provider.iterImages(split='train'), reverse=self.reverse, tokenize=self.tokenizer ))) else: raise ValueError("Unknown value for keyword argument 'with_para': {}".format(self.with_para)) if self.fit: sents_in = self.mapper.fit_transform(sents_in) imgs = self.scaler.fit_transform(imgs) else: sents_in = self.mapper.transform(sents_in) imgs = self.scaler.transform(imgs) sents_out = self.mapper.transform(sents_out) self.data['train'] = zip(sents_in, sents_out, imgs) # VALIDATION if self.with_para == 'para_rand': sents_in, sents_out, imgs = zip(*self.shuffled(arrange_para_rand(provider.iterImages(split='val'), reverse=self.reverse, tokenizer=self.tokenizer))) elif self.with_para == 'auto': sents_in, sents_out, imgs = zip(*self.shuffled(arrange_auto(provider.iterImages(split='val'), reverse=self.reverse, tokenizer=self.tokenizer))) elif self.with_para == 'para_all': sents_in, sents_out, imgs = zip(*self.shuffled(arrange_para(provider.iterImages(split='val'), reverse=self.reverse, tokenizer=self.tokenizer))) sents_in = self.mapper.transform(sents_in) sents_out = self.mapper.transform(sents_out) imgs = self.scaler.transform(imgs) self.data['valid'] = zip(sents_in, sents_out, imgs) self.batcher = Batcher(self.mapper, self.pad_end, tokenizer=self.tokenizer)
def __init__(self, size_embed, size, size_out, depth, gru_activation=tanh, dropout_prob=0.0): autoassign(locals()) encoder = lambda size_in, size:\ StackedGRUH0(size_embed, size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) decoder = lambda size_in, size: \ StackedGRU(size_embed, size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.Encdec = EncoderDecoderGRU(self.size, self.size, self.size, encoder=encoder, decoder=decoder) self.Predict = Dense(size_in=self.size, size_out=self.size_embed) self.params = params(self.Encdec, self.Predict)
def __init__(self, size_vocab, size_embed, size, size_out, depth, gru_activation=clipped_rectify, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Embed = Embedding(self.size_vocab, self.size_embed) self.Visual = Visual(self.size_embed, self.size, self.size_out, self.depth, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob) self.LM = StackedGRU(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.ToTxt = Dense(self.size, self.size_embed) # try direct softmax
def __init__(self, size_embed, size, size_out, depth, encoder=StackedGRUH0, gru_activation=clipped_rectify, visual_activation=linear, dropout_prob=0.0): autoassign(locals()) self.Encode = encoder(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.ToImg = Dense(self.size, self.size_out)
def __init__(self, size_in, size, recur_depth=1, drop_i=0.75 , drop_s=0.25, init_T_bias=-2.0, init_H_bias='uniform', tied_noise=True, init_scale=0.04, seed=1): autoassign(locals()) self._theano_rng = RandomStreams(self.seed // 2 + 321) #self._np_rng = np.random.RandomState(self.seed // 2 + 123) # self._is_training = tt.iscalar('is_training') hidden_size = self.size self.LinearH = Linear(in_size=self.size_in, out_size=hidden_size, bias_init=self.init_H_bias) self.LinearT = Linear(in_size=self.size_in, out_size=hidden_size, bias_init=self.init_T_bias) self.recurH = [] self.recurT = [] for l in range(self.recur_depth): if l == 0: self.recurH.append(Linear(in_size=hidden_size, out_size=hidden_size)) self.recurT.append(Linear(in_size=hidden_size, out_size=hidden_size)) else: self.recurH.append(Linear(in_size=hidden_size, out_size=hidden_size, bias_init=self.init_H_bias)) self.recurT.append(Linear(in_size=hidden_size, out_size=hidden_size, bias_init=self.init_T_bias))
def __init__(self, size_vocab, size_embed, size, size_out, depth, network, alpha=0.5, out_depth=1, gru_activation=tanh, visual_activation=linear, cost_visual=CosineDistance, max_norm=None, dropout_prob=0.0): autoassign(locals()) self.network = network(self.size_vocab, self.size_embed, self.size, self.size_out, self.depth, out_depth=self.out_depth, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob ) input = T.imatrix() output_t_prev = T.imatrix() output_t = T.imatrix() output_v = T.fmatrix() self.OH = OneHot(size_in=self.size_vocab) output_t_oh = self.OH(output_t) # TRAINING with context.context(training=True): output_v_pred, output_t_pred = self.network(input, output_t_prev, output_v) cost_T = CrossEntropy(output_t_oh, output_t_pred) cost_V = self.cost_visual(output_v, output_v_pred) cost = self.alpha * cost_T + (1.0 - self.alpha) * cost_V #TESTING with context.context(training=False): output_v_pred_test, output_t_pred_test = self.network(input, output_t_prev, output_v) cost_T_test = CrossEntropy(output_t_oh, output_t_pred_test) cost_V_test = self.cost_visual(output_v, output_v_pred_test) cost_test = self.alpha * cost_T_test + (1.0 - self.alpha) * cost_V_test self.updater = util.Adam(max_norm=self.max_norm) updates = self.updater.get_updates(self.network.params, cost) # TODO better way of dealing with needed/unneeded output_t_prev? self.train = theano.function([input, output_v, output_t_prev, output_t ], [cost, cost_T, cost_V], updates=updates, on_unused_input='warn') self.loss_test = theano.function([input, output_v, output_t_prev, output_t ], [cost_test, cost_T_test, cost_V_test], on_unused_input='warn')
def __init__(self, size_repr, size_classify=3, dropout=0.0): autoassign(locals()) self.Dropout = Dropout(prob=self.dropout) self.classify = Dense(self.size_repr * 2, self.size_classify) self.params = util.params(self.Dropout, self.classify)
def __init__(self, mapper, pad_end=False, tokenizer='word'): autoassign(locals()) self.BEG = self.mapper.BEG_ID self.END = self.mapper.END_ID
def __init__(self, activation): autoassign(locals()) self.params = []
def __init__(self, size_embed, size, size_out, depth, out_depth=1, gru_activation=tanh, dropout_prob=0.0): autoassign(locals()) self.Encode = StackedGRUH0(self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob) self.Project = Dense(self.size, self.size_out) self.params = params(self.Encode, self.Project)
def __init__(self, provider, mapper, scaler, batch_size=64, with_para='auto', shuffle=False, fit=True, pad_end=False, reverse=False, tokenizer='word'): autoassign(locals()) self.data = {} # TRAINING if self.with_para == 'para_rand': sents_in, sents_out, imgs = zip(*self.shuffled( arrange_para_rand(provider.iterImages(split='train'), reverse=self.reverse, tokenizer=self.tokenizer))) elif self.with_para == 'auto': sents_in, sents_out, imgs = zip(*self.shuffled( arrange_auto(provider.iterImages(split='train'), reverse=self.reverse, tokenizer=self.tokenizer))) elif self.with_para == 'para_all': sents_in, sents_out, imgs = zip(*self.shuffled( arrange_para(provider.iterImages(split='train'), reverse=self.reverse, tokenize=self.tokenizer))) else: raise ValueError( "Unknown value for keyword argument 'with_para': {}".format( self.with_para)) if self.fit: sents_in = self.mapper.fit_transform(sents_in) imgs = self.scaler.fit_transform(imgs) else: sents_in = self.mapper.transform(sents_in) imgs = self.scaler.transform(imgs) sents_out = self.mapper.transform(sents_out) self.data['train'] = zip(sents_in, sents_out, imgs) # VALIDATION if self.with_para == 'para_rand': sents_in, sents_out, imgs = zip(*self.shuffled( arrange_para_rand(provider.iterImages(split='val'), reverse=self.reverse, tokenizer=self.tokenizer))) elif self.with_para == 'auto': sents_in, sents_out, imgs = zip(*self.shuffled( arrange_auto(provider.iterImages(split='val'), reverse=self.reverse, tokenizer=self.tokenizer))) elif self.with_para == 'para_all': sents_in, sents_out, imgs = zip(*self.shuffled( arrange_para(provider.iterImages(split='val'), reverse=self.reverse, tokenizer=self.tokenizer))) sents_in = self.mapper.transform(sents_in) sents_out = self.mapper.transform(sents_out) imgs = self.scaler.transform(imgs) self.data['valid'] = zip(sents_in, sents_out, imgs) self.batcher = Batcher(self.mapper, self.pad_end, tokenizer=self.tokenizer)
def __init__(self, in_size, out_size, bias_init=None, init_scale=0.04): autoassign(locals()) self.w = self.make_param((self.in_size, self.out_size), 'uniform') if bias_init is not None: self.b = self.make_param((self.out_size,), self.bias_init)
def __init__(self, updater, encode, project): autoassign(locals()) self.inputs = [T.imatrix()] self.target = T.fmatrix()
def __init__(self, mapper, pad_end=False): autoassign(locals()) self.BEG = self.mapper.BEG_ID self.END = self.mapper.END_ID
def __init__(self, activation): autoassign(locals())