def build(self, input_shapes): if self.mode == "lr": self.learning_rate = self.add_weight( shape=(1, 1), name='learning_rate', initializer=Constant(0.001), constraint=NonNeg(), ) elif self.mode == "lr_per_step": self.learning_rate = self.add_weight( shape=(self.num_steps, 1), name='learning_rate', initializer=Constant(0.001), constraint=NonNeg(), ) elif self.mode == "lr_per_layer": self.learning_rate = self.add_weight( shape=(self.num_param_groups, 1), name='learning_rate', initializer=Constant(0.001), constraint=NonNeg(), ) elif self.mode == "lr_per_layer_per_step": self.learning_rate = self.add_weight( shape=(self.num_steps, self.num_param_groups, 1), name='learning_rate', initializer=Constant(0.001), constraint=NonNeg(), ) else: raise ValueError("Unsupported mode: %s" % self.mode)
def build(self, input_shape): super(DiscretizationLayerLite, self).build(input_shape) self.kernel = self.add_weight(name='kernel', shape=(1, self.output_dim), initializer=RandomUniform(minval=-2, maxval=2), trainable=True) self.sigmas = self.add_weight(name='sigma', shape=(1, self.output_dim), initializer=RandomNormal(mean=1, stddev=0.1), constraint=NonNeg(), trainable=True) self.mix = self.add_weight(name='mix', shape=( 1, self.output_dim, ), initializer=RandomNormal(1, 0.1), constraint=NonNeg(), trainable=True) self.temperature = self.add_weight(name='temperature', shape=( 1, 1, ), initializer=RandomNormal(1, 0.1), trainable=True) self.built = True
def build(self, input_shapes): if self.train_params: self.params = self.add_weight( shape=(1, self.num_params), name='params', initializer='uniform', ) if self.use_lr_per_step: self.learning_rate = self.add_weight( shape=(self.num_steps, self.num_param_groups,), name='learning_rate', initializer='zeros', constraint=NonNeg(), ) else: self.learning_rate = self.add_weight( shape=(self.num_param_groups,), name='learning_rate', initializer='zeros', constraint=NonNeg(), trainable=self.num_steps > 0, ) if self.use_kld_regularization: self.kld_weight = self.add_weight( shape=(1,), name='kld', initializer='zeros', constraint=NonNeg(), trainable=self.use_kld_regularization, )
def regression_chain_end(): model = Sequential() model.add(Dense(20, input_shape=(5,),activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(10, activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(5, activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(1, activation='linear', kernel_constraint=NonNeg())) model.compile(loss='mae', optimizer='adam') return model
def scalelayer(inputimg): img_real = tf.keras.layers.Lambda(lambda x: tf.math.real(x))( inputimg) img_imag = tf.keras.layers.Lambda(lambda x: tf.math.imag(x))( inputimg) img_real = tf.keras.layers.Lambda( lambda x: tf.cast(x, tf.float32))(img_real) img_imag = tf.keras.layers.Lambda( lambda x: tf.cast(x, tf.float32))(img_imag) scale = tf.keras.layers.Conv3D(1, 1, use_bias=False, kernel_constraint=NonNeg()) expand_inputreal = tf.keras.layers.Lambda( lambda x: tf.keras.backend.expand_dims(x, -1))(img_real) expand_inputimag = tf.keras.layers.Lambda( lambda x: tf.keras.backend.expand_dims(x, -1))(img_imag) temp_real = scale(expand_inputreal) temp_imag = scale(expand_inputimag) temp = tf.keras.layers.Lambda(lambda x: tf.complex(x[0], x[1]))( [temp_real, temp_imag]) temp = tf.keras.layers.Lambda(lambda x: tf.squeeze(x, -1))(temp) return tf.keras.layers.Lambda(lambda x: tf.cast(x, tf.complex64))( temp)
def build(self, input_shape): initer = [ np.linspace(-3, 3, self.output_dim).reshape(1, -1) for _ in range(input_shape[1]) ] initer = np.concatenate(initer, axis=0) width_val = 4 * 6. / input_shape[1] widths = np.sqrt(width_val) * np.ones( (input_shape[1], self.output_dim)) super(LaplaceLayerWide, self).build(input_shape) self.bins = self.add_weight(name='bins', shape=(input_shape[1], self.output_dim), initializer=Constant(initer), trainable=True) self.widths = self.add_weight( name='widths', shape=(input_shape[1], self.output_dim), initializer=Constant( widths), #TruncatedNormal(width_val, width_val / 4), constraint=NonNeg(), trainable=True) self.dense_weight = self.add_weight(name='w', shape=(input_shape[1], self.output_dim), initializer='glorot_uniform', trainable=True) #self.dense_bias = self.add_weight(name='b', # shape=(self.output_dim,), # initializer=Zeros(),#TruncatedNormal(1. / width_val, .25 / width_val), # trainable=True) self.built = True
def build(self, input_shape): l = -3 u = -l initer = [ np.linspace(l, u, self.output_dim).reshape(1, -1) for _ in range(input_shape[1]) ] initer = np.concatenate(initer, axis=0) width_val = 4. * float(u - l) / input_shape[1] super(DiscretizationLayer, self).build(input_shape) self.bins = self.add_weight(name='bins', shape=(input_shape[1], self.output_dim), initializer=Constant(initer), trainable=True) self.widths = self.add_weight(name='widths', shape=(input_shape[1], self.output_dim), initializer=TruncatedNormal( width_val, width_val / 4), constraint=NonNeg(), trainable=True) self.biases = self.add_weight( name='biases', shape=( input_shape[1], self.output_dim, ), initializer='glorot_uniform', #TruncatedNormal(0, 1), trainable=True) self.built = True
def build(self, input_shape): u = self.layer_config['bins_init_range'] l = -u bins_init = self.layer_config['bins_init'] if bins_init == 'linspace': initer = [ np.linspace(l, u, self.output_dim).reshape(1, -1) for _ in range(input_shape[1]) ] initer = np.concatenate(initer, axis=0) init = Constant(initer) elif bins_init == 'uniform': init = RandomUniform(l, u) else: raise Exception(bins_init) bias_initializer = Constant(self.layer_config['bias_init']) if self.layer_config['pre_sm_dropout'] > 0.0: self.dropout_mask = self.add_weight(name='dropout_mask', shape=(input_shape[1], self.output_dim), initializer=Constant(-10000), trainable=False) width_val = 3. * float(u - l) / input_shape[1] super(DiscretizationLayerWide, self).build(input_shape) self.bins = self.add_weight(name='bins', shape=(input_shape[1], self.output_dim), initializer=init, trainable=True) self.widths = self.add_weight(name='widths', shape=(input_shape[1], self.output_dim), initializer=TruncatedNormal( width_val, width_val / 4), constraint=NonNeg(), trainable=True) self.biases = self.add_weight(name='biases', shape=( input_shape[1], self.output_dim, ), initializer=bias_initializer, trainable=True) self.dense_weight = self.add_weight( name='w', shape=(input_shape[1], self.output_dim), initializer='glorot_uniform', # RandomUniform(-1, 1),# trainable=True) self.dense_bias = self.add_weight( name='b', shape=(input_shape[1], ), initializer=Zeros( ), #RandomUniform(-0.1, 0.1), # 'glorot_uniform', trainable=True) self.built = True
def build(self, input_shape): # Create a trainable weight variable for this layer. #print('input_shape[3]', input_shape[3]) #print('self.output_dim)', self.output_dim) self.kernel = self.add_weight(name='kernel_smart',shape=self.kernel_size + (input_shape[3], self.filters), initializer=self.kernel_initializer, constraint=self.kernel_constraint, regularizer=self.kernel_regularizer, trainable=True) self.channel_selector = self.add_weight(shape=(self.filters,), initializer=initializers.RandomUniform(minval=0, maxval=self.filters, seed=self.channel_selector_seed), name='selector', regularizer=None, constraint=NonNeg()) if self.use_bias: self.bias = self.add_weight(shape=(self.filters,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) super(SmartConv2D, self).build(input_shape) # Be sure to call this somewhere!
def mlp(): model = Sequential() model.add(Dense(5, activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(10, activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(20, activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(10, activation='relu', kernel_constraint=NonNeg())) model.add(Dropout(0.5)) model.add(Dense(7, activation='linear', kernel_constraint=NonNeg())) opt = Adadelta(lr=0.001) model.compile(loss='mae', optimizer=opt,metrics=["accuracy"]) return model
def create_model( num_cells, dropout, r_dropout, learning_rate, #input_dim=vocab_size, #output_dim=embedding_size, #input_length=max_doc_length ): # Model definition model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=embedding_size, weights=[embedding_matrix], input_length=max_doc_length, trainable=True)) #model.add(LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=True, kernel_constraint=NonNeg())) #if NUM_LAYERS==1: # model.add(Bidirectional(LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=False, kernel_constraint=NonNeg()))) #elif NUM_LAYERS==2: # stacked LSTM model.add( Bidirectional( LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=True, kernel_constraint=NonNeg()))) model.add( LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, kernel_constraint=NonNeg())) #else: # print("number of layers not specified properly") #model.add(TimeDistributed(Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))) #model.add(Dense(1, activation='sigmoid', kernel_constraint=NonNeg())) model.add(Dense(2, activation='softmax', kernel_constraint=NonNeg())) opt = Adam(lr=learning_rate) #model = multi_gpu_model(model, gpus=2) model.compile( optimizer=opt, #loss='binary_crossentropy', loss='categorical_crossentropy', metrics=['accuracy']) return model
def create_model(num_cells, dropout, r_dropout, learning_rate, num_epochs, seq, train_lab, dev_seq, dev_lab, input_dim, output_dim, input_length): K.clear_session() myInput = Input(shape=(max_doc_length, ), name='input') x = Embedding(input_dim=vocab_size, output_dim=embedding_size, weights=[embedding_matrix], input_length=max_doc_length, trainable=True)(myInput) lstm_out = LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=True, kernel_constraint=NonNeg())(x) predictions = TimeDistributed( Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))(lstm_out) model = Model(inputs=myInput, outputs=predictions) opt = Adam(lr=learning_rate) #model = multi_gpu_model(model, gpus=2) model.compile( optimizer=opt, loss='binary_crossentropy', #loss='categorical_crossentropy', metrics=['accuracy']) #if trainingdata == "liar": model.fit({'input': seq}, train_lab, epochs=num_epochs, verbose=2, batch_size=num_batch, validation_data=(dev_seq, dev_lab)) #else: # model.fit({'input': seq}, train_lab, epochs=num_epochs, verbose=2, batch_size=num_batch) #print("Testing...") #test_score = model.evaluate(test_seq, test_lab, batch_size=num_batch, verbose=0) #if trainingdata == "liar": dev_score = model.evaluate(dev_seq, dev_lab, batch_size=num_batch, verbose=0) #print("Test loss:", test_score[0]) #print("Test accuracy:", test_score[1]) return dev_score[1]
def icnn_model(input_dim, output_dim, num_layers=3, num_units=256, hidden_activation="relu", output_activation="relu", constraint=NonNeg()): """ Create a ICNN with specified properties, following Section 3 in http://proceedings.mlr.press/v70/amos17b/amos17b.pdf. The network structure is that all non-passthrough kernel weights (W_i) are constrained to be non-negative with no bias terms, and pass-through biases and kernel weights (D_i) as unconstrained. All activations are "relu" by default. Args: input_dim: dimension of input tensor output_dim: dimension of output tensor num_layers: number of dense layers num_units: number of hidden unites per dense layer activation: activation function used in all layers Returns: model: ICNN keras model object with specified properties """ u = Input(shape=(input_dim, ), name="u") # Concatenate inputs and pass through first non-negative layer z = Dense(num_units, activation=hidden_activation, kernel_constraint=constraint, use_bias=False, name="W_1", kernel_initializer='random_uniform')(u) # Additional non-negative layers with pass-through from inputs for n in range(num_layers): z = Dense(num_units, activation=hidden_activation, kernel_constraint=constraint, use_bias=False, kernel_initializer='random_uniform', name="W_{}".format(n + 2))(z) z_pass = Dense(num_units, activation=hidden_activation, name="D_{}".format(n + 2))(u) z = Add(name="z_{}".format(n + 2))([z, z_pass]) # Output layer z = Dense(output_dim, activation=output_activation, kernel_constraint=constraint, use_bias=False, kernel_initializer='random_uniform', name="output")(z) return Model(inputs=u, outputs=z)
def faststage(prex, lastx, v, Phiy, phi): beta = DepthwiseConv2D(1, use_bias=False, depthwise_constraint=NonNeg())( Subtract()([lastx, prex])) x = Add()([lastx, beta]) diff_yx = Subtract()([Phiy, TimesPhiPhi(x, phi)]) diff_vx = Subtract()([v, x]) diff_yx = DepthwiseConv2D(1, use_bias=False, depthwise_constraint=NonNeg())(diff_yx) diff_vx = DepthwiseConv2D(1, use_bias=False, depthwise_constraint=NonNeg())(diff_vx) x_next = Add()([x, diff_yx, diff_vx, beta]) v_next = denoiseblock(x_next) return x_next, v_next
def train(self): if self.use_pca: x = self.x_train_pca else: x = self.x_train input_dim = x.shape[1] hidden = Dense(6, input_dim=input_dim, kernel_regularizer=l1(0.01)) if self.non_neg: output = Dense(1, input_dim=input_dim, kernel_regularizer=l1(0.01), kernel_constraint=NonNeg()) else: output = Dense(1, input_dim=input_dim, kernel_regularizer=l1(0.01)) model = Sequential() # Hidden layer model.add(hidden) model.add(BatchNormalization()) model.add(Activation('tanh')) model.add(Dropout(0.4)) # Output layer model.add(output) model.add(Activation('relu')) model.compile(loss='mean_squared_error', optimizer='adam') self.model = model callbacks = [] if self.checkpoint: callbacks.append(ModelCheckpoint(self.checkpoint, monitor='loss')) self.history = self.model.fit(x, self.y_train, epochs=self.epochs, validation_split=0.15, verbose=0, callbacks=callbacks) return self.history
def build(self, input_shape): # Create a trainable weight variable for this layer. # print("This is input_shape", input_shape) -> (None, 6061) self.kernel = self.add_weight( name='kernel', shape=(input_shape[1], ), #initializer=keras.initializers.glorot_normal(seed=None), #initializer=keras.initializers.RandomUniform(minval=0, maxval=1, seed=None), #initializer=keras.initializers.Ones(), initializer=keras.initializers.he_normal(seed=None), regularizer=l1(0.001), # 0.001 trainable=True, constraint=NonNeg()) super(FSLayer, self).build(input_shape) # Be sure to call this at the end
def build(self, input_shape): length = (1, input_shape[-1], self.filters, self.waves_per_filter) self.frequencies = self.add_weight( name='frequencies', shape=length, trainable=True, initializer=RandomUniform(minval=self.frequency_low, maxval=self.frequency_high), constraint=MinMaxClip(self.frequency_low, self.frequency_high), dtype=np.float32) self.amplitudes = self.add_weight(name='amplitudes', shape=length, trainable=True, initializer=RandomUniform( minval=1.0, maxval=2.0), constraint=NonNeg(), dtype=np.float32) self.phases = self.add_weight(name='phases', shape=length, trainable=True, initializer=RandomUniform(minval=0.0, maxval=2 * np.pi), constraint=MinMaxClip(0, 2 * np.pi), dtype=np.float32) self.kernel_shape = (self.number_of_cycles * self.T_LEN, ) + length[1:] if self.kernel_shape[0] > input_shape[1]: msg = 'A low and high frequency cutoff of (f_low, f_high) of ' msg += '({}, {}) '.format(self.frequency_low, self.frequency_high) msg += 'combined with a repition rate of {}'.format( self.number_of_cycles) msg += ' results in a kernel of length ' msg += '{}. The maximum length '.format(self.kernel_shape[0]) msg += 'however must be smaller or equal ' msg += '{}. (input_shape[1])'.format(input_shape[1]) raise ValueError(msg) #print("Kernel shape: {}".format(self.kernel_shape)) self.input_spec = InputSpec(ndim=self.rank + 2, axes={-1: input_shape[-1]}) super(WConv1D, self).build(input_shape)
def build(self, input_shape): u = -6 l = 6 initer = [ np.linspace(l, u, self.output_dim).reshape(1, -1) for _ in range(input_shape[1]) ] initer = np.concatenate(initer, axis=0) init = Constant(initer) bias_initializer = Constant(0) width_val = 3. * float(u - l) / input_shape[1] super(DiscretizationLayerWide, self).build(input_shape) self.bins = self.add_weight(name='bins', shape=(input_shape[1], self.output_dim), initializer=init, trainable=True) self.widths = self.add_weight(name='widths', shape=(input_shape[1], self.output_dim), initializer=TruncatedNormal( width_val, width_val / 4), constraint=NonNeg(), trainable=True) self.biases = self.add_weight(name='biases', shape=( input_shape[1], self.output_dim, ), initializer=bias_initializer, trainable=True) self.dense_weight = self.add_weight(name='w', shape=(input_shape[1], self.output_dim), initializer='glorot_uniform', trainable=True) self.dense_bias = self.add_weight(name='b', shape=(input_shape[1], ), initializer=Zeros(), trainable=True) self.built = True
def return_norm(name, lstm_config, minimum, maximum, logger): """ Return Norm object to norm weight of neural network. """ log_name = name name = lstm_config[name.lower()] if name == 'maxnorm': logger.info("In {} use {} constraint with max={}".format( log_name, name, maximum)) return MaxNorm(maximum) if name == 'nonnegnorm': logger.info("In {} use {} constraint ".format(log_name, name)) return NonNeg() if name == 'minmaxnorm': logger.info("In {} use {} constraint with min={} and max={}".format( log_name, name, minimum, maximum)) return MinMaxNorm(minimum, maximum) else: logger.info("None constraint in {}.".format(log_name)) return None
def build(self, input_shape): self.gamma = K.variable(self.gamma, dtype='float64', trainable=self.train_gamma, constraint=NonNeg()) super(RBFKernel, self).build(input_shape)
def build(self, input_shape): self.cp = K.variable(self.cp, dtype='float64', constraint=NonNeg(), trainable=self.train_cp) super(PolynomialKernel, self).build(input_shape)
def lstm_model_only(seq, test_seq, train_lab, test_lab, embedding_matrix, vocab_size, dropout, r_dropout, num_cells, learning_rate, num_epochs, trainingdata, max_doc_length=100, embedding_size=300, TIMEDISTRIBUTED=False, dev_seq=None, dev_lab=None): myInput = Input(shape=(max_doc_length,), name='input') print(myInput.shape) if use_pretrained_embeddings: x = Embedding(input_dim=vocab_size, output_dim=embedding_size, weights=[embedding_matrix],input_length=max_doc_length,trainable=True)(myInput) else: x = Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_doc_length)(myInput) print(x.shape) if TIMEDISTRIBUTED: lstm_out = LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=True, kernel_constraint=NonNeg())(x) predictions = TimeDistributed(Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))(lstm_out) else: lstm_out = Bidirectional(LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout))(x) predictions = Dense(2, activation='softmax')(lstm_out) model = Model(inputs=myInput, outputs=predictions) opt = Adam(lr=learning_rate) if TIMEDISTRIBUTED: model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) else: model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) print("fitting model..") if trainingdata == "liar": history = model.fit({'input': seq}, train_lab, epochs=num_epochs, verbose=2, batch_size=num_batch, validation_data=(dev_seq,dev_lab)) else: history = model.fit({'input': seq}, train_lab, epochs=num_epochs, verbose=2, batch_size=num_batch) model.summary() return model, history
def train_and_test(datapath="/home/ktj250/thesis/data/", emb_model_path="/home/ktj250/thesis/", TIMEDISTRIBUTED=False, trainingdata="liar", num_cells=32, num_epochs=10, dropout=0.4, r_dropout=0.4, num_batch=64, learning_rate=0.0001): K.clear_session() #colab_directory_path = "/gdrive/My Drive/Thesis/" #TIMEDISTRIBUTED = False use_pretrained_embeddings = True FAKE=1 #trainingdata = sys.argv[1] #"liar" # kaggle, FNC, BS print("trainingdata=",trainingdata) if trainingdata == "liar": train, dev, test, train_lab, dev_lab, test_lab = load_liar_data(datapath) elif trainingdata == "kaggle": train, test, train_lab, test_lab = load_kaggle_data(datapath) elif trainingdata == "FNC": train, test, train_lab, test_lab = load_FNC_data(datapath) elif trainingdata == "BS": train, test, train_lab, test_lab = load_BS_data(datapath) train = [nltk.word_tokenize(i.lower()) for i in train] test = [nltk.word_tokenize(i.lower()) for i in test] if trainingdata == "liar": dev = [nltk.word_tokenize(i.lower()) for i in dev] else: dev = train[int(abs((len(train_lab)/3)*2)):] dev_lab = train_lab[int(abs((len(train_lab)/3)*2)):] train = train[:int(abs((len(train_lab)/3)*2))] train_lab = train_lab[:int(abs((len(train_lab)/3)*2))] print(len(train), len(dev)) all_train_tokens = [] for i in train: for word in i: all_train_tokens.append(word) vocab = set(all_train_tokens) word2id = {word: i+1 for i, word in enumerate(vocab)}# making the first id is 1, so that I can pad with zeroes. word2id["UNK"] = len(word2id)+1 id2word = {v: k for k, v in word2id.items()} #trainTextsSeq: List of input sequence for each document (A matrix with size num_samples * max_doc_length) trainTextsSeq = np.array([[word2id[w] for w in sent] for sent in train]) testTextsSeq = np.array([[word2id.get(w, word2id["UNK"]) for w in sent] for sent in test]) #if trainingdata == "liar": devTextsSeq = np.array([[word2id.get(w, word2id["UNK"]) for w in sent] for sent in dev]) # PARAMETERS # vocab_size: number of tokens in vocabulary vocab_size = len(word2id)+1 # max_doc_length: length of documents after padding (in Keras, the length of documents are usually padded to be of the same size) max_doc_length = 100 # LIAR 100 (like Wang), Kaggle 3391, FakeNewsCorpus 2669 # num_samples: number of training/testing data samples num_samples = len(train_lab) # num_time_steps: number of time steps in LSTM cells, usually equals to the size of input, i.e., max_doc_length num_time_steps = max_doc_length embedding_size = 300 # also just for now.. # padding with max doc lentgh seq = sequence.pad_sequences(trainTextsSeq, maxlen=max_doc_length, dtype='int32', padding='post', truncating='post', value=0.0) print("train seq shape",seq.shape) test_seq = sequence.pad_sequences(testTextsSeq, maxlen=max_doc_length, dtype='int32', padding='post', truncating='post', value=0.0) #if trainingdata == "liar": dev_seq = sequence.pad_sequences(devTextsSeq, maxlen=max_doc_length, dtype='int32', padding='post', truncating='post', value=0.0) if TIMEDISTRIBUTED: train_lab = tile_reshape(train_lab, num_time_steps) test_lab = tile_reshape(test_lab, num_time_steps) print(train_lab.shape) #if trainingdata == "liar": dev_lab = tile_reshape(dev_lab, num_time_steps) else: train_lab = to_categorical(train_lab, 2) test_lab = to_categorical(test_lab, 2) print(train_lab.shape) #if trainingdata == "liar": dev_lab = to_categorical(dev_lab, 2) print("Parameters:: num_cells: "+str(num_cells)+" num_samples: "+str(num_samples)+" embedding_size: "+str(embedding_size)+" epochs: "+str(num_epochs)+" batch_size: "+str(num_batch)) if use_pretrained_embeddings: # https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html # Load Google's pre-trained Word2Vec model. model = gensim.models.KeyedVectors.load_word2vec_format(emb_model_path+'GoogleNews-vectors-negative300.bin', binary=True) embedding_matrix = np.zeros((len(word2id) + 1, 300)) for word, i in word2id.items(): try: embedding_vector = model.wv[word] except: embedding_vector = model.wv["UNK"] if embedding_vector is not None: embedding_matrix[i] = embedding_vector myInput = Input(shape=(max_doc_length,), name='input') print(myInput.shape) if use_pretrained_embeddings: x = Embedding(input_dim=vocab_size, output_dim=embedding_size, weights=[embedding_matrix],input_length=max_doc_length,trainable=True)(myInput) else: x = Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_doc_length)(myInput) print(x.shape) if TIMEDISTRIBUTED: lstm_out = LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=True, kernel_constraint=NonNeg())(x) predictions = TimeDistributed(Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))(lstm_out) else: lstm_out = Bidirectional(LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout))(x) predictions = Dense(2, activation='softmax')(lstm_out) model = Model(inputs=myInput, outputs=predictions) opt = Adam(lr=learning_rate) if TIMEDISTRIBUTED: model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) else: model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) print("fitting model..") #if trainingdata == "liar": history = model.fit({'input': seq}, train_lab, epochs=num_epochs, verbose=2, batch_size=num_batch, validation_data=(dev_seq,dev_lab)) #else: # history = model.fit({'input': seq}, train_lab, epochs=num_epochs, verbose=2, batch_size=num_batch) print("Testing...") test_score = model.evaluate(test_seq, test_lab, batch_size=num_batch, verbose=0) #if trainingdata == "liar": dev_score = model.evaluate(dev_seq, dev_lab, batch_size=num_batch, verbose=0) print("Test loss:", test_score[0]) print("Test accuracy:", test_score[1]) #if trainingdata == "liar": print("Valid loss:", dev_score[0]) print("Valid accuracy:", dev_score[1]) if not TIMEDISTRIBUTED: preds = model.predict(test_seq) f1 = f1_score(np.argmax(test_lab,axis=1), np.argmax(preds, axis=1)) tn, fp, fn, tp = confusion_matrix(np.argmax(test_lab,axis=1), np.argmax(preds, axis=1)).ravel() print("tn, fp, fn, tp") print(tn, fp, fn, tp) model.summary() #if trainingdata=="liar": # return dev_score[1], history #else: return test_score[1], dev_score[1], history, f1
def main(model, params): datafolder = params['d'] training_passes = params['t'] eval_passes = params['e'] predict_passes = params['p'] batch_size = params['bs'] drop = params['drop'] dim = params['ed'] constr_dict = { 'maxnorm': MaxNorm(1, axis=1), 'unitnorm': UnitNorm(axis=1), 'nonneg': NonNeg() } reg_dict = {'l1': l1(0.01), 'l2': l2(0.01), 'l1_l2': l1_l2(0.01, 0.01)} train_file = datafolder + "train.txt" valid_file = datafolder + "valid.txt" test_file = datafolder + "test.txt" false_train_file = datafolder + "false_train.txt" E_mapping, R_mapping = mapping( [train_file, valid_file, test_file, false_train_file]) VOC_SIZE = len(list(E_mapping.keys())) PRED_SIZE = len(list(R_mapping.keys())) true_train = np.squeeze( np.asarray( list( data_iterator(train_file, E_mapping, R_mapping, batch_size=-1, mode=params['training_mode'])))) if params['reverse_labels']: #TransE true_train_labels = np.zeros(len(true_train.T)) else: true_train_labels = np.ones(len(true_train.T)) if params['false_mode'] == 'fromfile': false_train = np.squeeze( np.asarray( list( data_iterator(false_train_file, E_mapping, R_mapping, batch_size=-1, mode=params['training_mode'])))) else: s, p, o = true_train false_train = np.asarray( corrupt_triples(s, p, o, params['check'], params['false_mode'])) if params['reverse_labels']: false_train_labels = np.ones(len(false_train.T)) else: false_train_labels = np.zeros(len(false_train.T)) if params['constraint']: const = constr_dict[params['constraint']] else: const = None if params['regularizer']: reg = reg_dict[params['regularizer']] else: reg = None m = model(VOC_SIZE, PRED_SIZE, dim, embeddings_regularizer=const, embeddings_constraint=reg, dropout=params['drop']) m.compile(loss=params['loss'], optimizer='adagrad', metrics=['mae']) for i in range(training_passes): if params['false_mode'] != 'fromfile': s, p, o = true_train false_train = np.asarray( corrupt_triples(s, p, o, params['check'], params['false_mode'])) tmpX = np.concatenate([false_train.T, true_train.T], axis=0) tmpY = np.concatenate([false_train_labels.T, true_train_labels.T], axis=0) tmpY = tmpY * (1 - params['ls']) + params['ls'] / 2 m.fit(tmpX, tmpY, epochs=1, shuffle=True, batch_size=batch_size) try: if (i % eval_passes == 0 and i != 0) or (i == training_passes - 1 and eval_passes > 0): if params['filtered']: tmp = true_train.T else: tmp = [] res = evaluate(m, valid_file, E_mapping, R_mapping, params['reverse_labels'], tmp) print(res) except ZeroDivisionError: pass if params['store']: store_embedding(m, E_mapping, R_mapping, datafolder) if predict_passes > 0: print(predict_passes) test = np.squeeze( np.asarray( list( data_iterator(test_file, E_mapping, R_mapping, batch_size=-1, mode=params['training_mode'])))).T pred = m.predict(test) pred = [p[0] for p in pred] mapping_e = reverse_dict(E_mapping) mapping_r = reverse_dict(R_mapping) with open(params['output_file'], 'w') as f: for t, p in zip(test, pred): s, r, o = t s, r, o = mapping_e[s], mapping_r[r], mapping_e[o] string = '\t'.join(map(str, [s, r, o, p])) + '\n' f.write(string)
output_dim=embedding_size, weights=[embedding_matrix], input_length=max_doc_length, trainable=True)(myInput) else: x = Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_doc_length)(myInput) print(x.shape) if TIMEDISTRIBUTED: lstm_out = LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout, return_sequences=True, kernel_constraint=NonNeg())(x) predictions = TimeDistributed( Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))(lstm_out) else: lstm_out = Bidirectional( LSTM(num_cells, dropout=dropout, recurrent_dropout=r_dropout))(x) predictions = Dense(2, activation='softmax')(lstm_out) model = Model(inputs=myInput, outputs=predictions) # try-except to switch between gpu and cpu version when not working in google colab #try: # parallel_model = multi_gpu_model(model, gpus=2) # parallel_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # print("fitting model..") # parallel_model.fit({'input': seq}, y_train_tiled, epochs=num_epochs, verbose=2, batch_size=num_batch, validation_split=.20)
0.1 / nb_samples))(users) item_bias = Embedding(n_items, 1, embeddings_regularizer=regularizers.l2(0.1 / nb_samples), name='acquix')(items) user_embed = Embedding(n_users, n_dim, embeddings_regularizer=regularizers.l2( 0.1 / nb_samples))(users) # user_embed = concatenate([ones, user_embed]) # print(user_embed) item_embed = Embedding(n_items, n_dim, embeddings_constraint=NonNeg(), embeddings_regularizer=regularizers.l2( 0.1 / nb_samples))(items) # item_embed = concatenate([item_embed, ones]) # product = multiply([user_embed, item_embed]) # pairwise = Flatten()(AveragePooling1D(n_dim, data_format='channels_first')(product)) # sys.exit(0) # features = concatenate([user_embed, item_embed, product]) # hidden = product # hidden = Dense(2 * n_dim, activation='relu')(features) # logit = Dense(1, use_bias=False)(hidden) # logit = # logit = Flatten()(add([item_bias, pairwise])) # logit = dot([user_embed, item_embed], axes=-1)
def model(l1=0.0, l2=0.0, hard_constraint=False, code_network=False, sparse=True, distil_temp=1.0, restriction='weight', two_outs=False): manifest_inputs = 185729 + 72 + 6379 + 3812 + 4513 + 33222 code_inputs = 310488 + 315 + 733 + 70 def W_regularizer(): if restriction == 'weights' and l1 > 0.0 or l2 > 0.0: return NonNegWeightRegularizer(l1=l1, l2=l2) else: return None def A_regularizer(): if restriction == 'activations' and l1 > 0.0 or l2 > 0.0: return NonNegActivityRegularizer(l1=l1, l2=l2) if restriction == 'presum' and l1 > 0.0 or l2 > 0.0: return NonNegActivityRegularizer2(l1=l1, l2=l2) else: return None if hard_constraint: W_constraint = NonNeg() else: W_constraint = None def init(): do_nonneg = False if do_nonneg: return nonneg_init else: return glorot_normal manifest_tensor = tf.sparse_placeholder(tf.float32) code_tensor = tf.sparse_placeholder(tf.float32) manifest_input = Input(shape=(manifest_inputs,), sparse=sparse) code_input = Input(shape=(code_inputs,), sparse=sparse) all_inputs = merge([manifest_input, code_input], mode='concat', concat_axis=-1) hidden = Dense(200, init=init(), activation='relu', W_constraint=W_constraint, W_regularizer=W_regularizer(), activity_regularizer=A_regularizer())(all_inputs) hidden = Dropout(0.5)(hidden) last = Dense(200, init=init(), activation='relu', W_constraint=W_constraint, W_regularizer=W_regularizer(), activity_regularizer=A_regularizer())(hidden) if code_network: code = Dense(200, activation='relu')(code_input) code = Dense(200, activation='relu')(code) last = merge([code, last], mode='concat') last = Dropout(0.5)(last) if distil_temp != 1.0 or two_outs: last = Dense(2, init=init(), W_constraint=W_constraint, W_regularizer=W_regularizer(), activity_regularizer=A_regularizer())(last) last = DistilTempLayer(distil_temp)(last) predictions = Activation('softmax')(last) #Slice off the second class to not have to rewrite my data code predictions = Lambda(lambda x: x[:, 0:1], name='prediction')(predictions) else: predictions = Dense(1, W_constraint=W_constraint, W_regularizer=W_regularizer(), activity_regularizer=A_regularizer())(last) m = Model(input=[manifest_input, code_input], output=predictions) return m
y_train_tiled = np.tile(train_lab, (num_time_steps,1)).T y_train_tiled = y_train_tiled.reshape(len(train_lab), num_time_steps , 1) #print("y_train_shape:",y_train_tiled.shape) return y_train_tiled y_train_tiled = tile_reshape(train_lab, num_time_steps) y_test_tiled = tile_reshape(dev_lab, num_time_steps) print("Parameters:: num_cells: "+str(num_cells)+" num_samples: "+str(num_samples)+" embedding_size: "+str(embedding_size)+" epochs: "+str(num_epochs)+" batch_size: "+str(num_batch)) #print(y_train_tiled) myInput = Input(shape=(max_doc_length,), name='input') print(myInput.shape) x = Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_doc_length)(myInput) print(x.shape) lstm_out = LSTM(num_cells, dropout=0.4, recurrent_dropout=0.4, return_sequences=True, kernel_constraint=NonNeg())(x) print(lstm_out.shape) #out = TimeDistributed(Dense(2, activation='softmax'))(lstm_out) predictions = TimeDistributed(Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))(lstm_out) #kernel_constraint=NonNeg() print("predictions_shape:",predictions.shape) model = Model(inputs=myInput, outputs=predictions) # try-except to switch between gpu and cpu version try: parallel_model = multi_gpu_model(model, gpus=4) parallel_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) #model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print("fitting model..") parallel_model.fit({'input': seq}, y_train_tiled, epochs=num_epochs, verbose=2, batch_size=num_batch, validation_split=.20) #parallel_model.fit(seq, y_train_tiled, epochs=num_epochs, verbose=2, steps_per_epoch=(np.int(np.floor(num_samples/num_batch))), validation_split=.20) # or try this, removing the curly brackets.
myInput = Input(shape=(max_doc_length, ), name='input') print(myInput.shape) # use pretrained embeddings: x = Embedding(input_dim=vocab_size, output_dim=embedding_size, weights=[embedding_matrix], input_length=max_doc_length, trainable=True)(myInput) #x = Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_doc_length)(myInput) print(x.shape) #lstm_out = CuDNNLSTM(num_cells, return_sequences=True, kernel_constraint=NonNeg())(x) lstm_out = LSTM(num_cells, dropout=0.8, recurrent_dropout=0.8, return_sequences=True, kernel_constraint=NonNeg())(x) print(lstm_out.shape) #out = TimeDistributed(Dense(2, activation='softmax'))(lstm_out) predictions = TimeDistributed( Dense(1, activation='sigmoid', kernel_constraint=NonNeg()))(lstm_out) #kernel_constraint=NonNeg() print("predictions_shape:", predictions.shape) model = Model(inputs=myInput, outputs=predictions) # try-except to switch between gpu and cpu version #try: # parallel_model = multi_gpu_model(model, gpus=2) # parallel_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # print("fitting model..") # parallel_model.fit({'input': seq}, y_train_tiled, epochs=num_epochs, verbose=2, batch_size=num_batch, validation_split=.20)
################################################################# params = pickle.load(open('params_recog_sp64_654.pkl','rb')) # You can create this file by running create_params.py flow_dir_tens = Input(shape=(n_timesteps, w_frames, w_frames)) flow_speed_cont_tens = Input(shape=(n_timesteps, w_frames, w_frames,2)) # when you want to feed this tensor using numpy arrays, stack speed and contrast like this: speeds_conts = np.stack([speeds,contrasts], axis=-1) speed_tent_tens = Input(shape=(n_timesteps, w_frames, w_frames, n_tent)) dir_ = TimeDistributed(DirectionTuning(n_mt,params))(flow_dir_tens) print('dir_.shape: ' + str(dir_.get_shape)) speed_gauss = TimeDistributed(SpeedTuning(n_mt,params,unit_conv_sp))(flow_speed_cont_tens) print('speed_gauss_.shape: ' + str(speed_gauss.get_shape)) #combine input speed_N = TimeDistributed(SmartInput(n_mt, regularizer=None, constraint=NonNeg()), name='SmartInput1')(speed_tent_tens) speed_NS = TimeDistributed(SmartInput(n_mt, regularizer=None, constraint=NonNeg()), name='SmartInput2')(speed_tent_tens) speed_dir_P = Multiply()([dir_,speed_gauss]) speed_dir_N = Multiply()([dir_,speed_N]) #################################################################### #MT n_mt2 = n_mt w_mt = 15 p_mt = 6 mt_1p = TimeDistributed(SmartConv2D(n_mt, (w_mt, w_mt), activation=None, use_bias=False, padding="SAME", kernel_constraint=NonNeg()), name='exc')(TimeDistributed(BatchNormalization())(speed_dir_P)) mt_1N = TimeDistributed(SmartConv2D(n_mt, (w_mt, w_mt), activation=None, use_bias=False, padding="SAME", kernel_constraint=NonPos()), name='sup')(TimeDistributed(BatchNormalization())(speed_dir_N)) mt_1NS = TimeDistributed(SmartConv2D(n_mt, (w_mt, w_mt), activation=None, use_bias=False, padding="SAME", kernel_constraint=NonPos()), name='nsup')(TimeDistributed(BatchNormalization())(speed_NS))