def build_forward(self, _input): output = _input # [batch_size, num_steps, rnn_units] self.feature_dim = int(output.get_shape()[2]) # rnn_units output = tf.reshape(output, [-1, self.feature_dim]) # [batch_size * num_steps, rnn_units] final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax' if self.net_type == 'simple': net_config = [] if self.net_config is None else self.net_config with tf.variable_scope('wider_actor'): for layer in net_config: units, activation = layer.get('units'), layer.get('activation', 'relu') output = BasicModel.fc_layer(output, units, use_bias=True) output = BasicModel.activation(output, activation) logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True) # [batch_size * num_steps, out_dim] probs = BasicModel.activation(logits, final_activation) # [batch_size * num_steps, out_dim] probs_dim = self.out_dim if self.out_dim == 1: probs = tf.concat([1 - probs, probs], axis=1) probs_dim = 2 self.q_values = tf.reshape(BasicModel.fc_layer(output, probs_dim, use_bias=True), [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] self.decision = tf.multinomial(tf.log(probs), 1) # [batch_size * num_steps, 1] self.decision = tf.reshape(self.decision, [-1, self.num_steps]) # [batch_size, num_steps] self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] self.values = tf.reduce_sum(tf.multiply(self.q_values, self.probs), axis=-1) # [batch_size, num_steps] self.selected_prob = tf.reduce_sum(tf.one_hot(self.decision, probs_dim) * self.probs, axis=-1) self.selected_q = tf.reduce_sum(tf.one_hot(self.decision, probs_dim) * self.q_values, axis=-1) else: raise ValueError('Do not support %s' % self.net_type)
def build_forward(self, encoder_output, encoder_state, is_training, decision_trajectory): self._define_input() self.decision, self.probs, self.selected_prob, self.q_values, self.selected_q, self.values = [], [], [], [], [], [] batch_size = array_ops.shape(encoder_output)[0] if self.attention_config is None: cell = self.build_decoder_cell(encoder_state) cell_state = encoder_state cell_input = tf.zeros(shape=[batch_size], dtype=tf.int32) with tf.variable_scope('deeper_actor'): for _i in range(self.decision_num): cell_input_embed = embedding(cell_input, 1 if _i == 0 else self.out_dims[_i - 1], self.embedding_dim, name='deeper_actor_embedding_%d' % _i) with tf.variable_scope('rnn', reuse=(_i > 0)): cell_output, cell_state = cell(cell_input_embed, cell_state) with tf.variable_scope('classifier_%d' % _i): logits_i = BasicModel.fc_layer(cell_output, self.out_dims[_i], use_bias=True) # [batch_size, out_dim_i] with tf.variable_scope('q_value_%d' % _i): qv = BasicModel.fc_layer(cell_output, self.out_dims[_i], use_bias=True) # [batch_size, out_dim_i] act_i = 'softmax' probs_i = BasicModel.activation(logits_i, activation=act_i) # [batch_size, out_dim_i] if _i == 1: # determine the layer index for deeper actor # require mask one_hot_block_decision = tf.one_hot(cell_input, depth=self.out_dims[0], dtype=tf.int32) max_layer_num = tf.multiply(self.block_layer_num, one_hot_block_decision) max_layer_num = tf.reduce_max(max_layer_num, axis=1) # [batch_size] layer_mask = tf.sequence_mask(max_layer_num, self.out_dims[1], dtype=tf.float32) probs_i = tf.multiply(probs_i, layer_mask) # rescale the sum to 1 probs_i = tf.divide(probs_i, tf.reduce_sum(probs_i, axis=1, keep_dims=True)) decision_i = tf.multinomial(tf.log(probs_i), 1) # [batch_size, 1] decision_i = tf.cast(decision_i, tf.int32) decision_i = tf.reshape(decision_i, shape=[-1]) # [batch_size] cell_input = tf.cond( is_training, lambda: decision_trajectory[:, _i], lambda: decision_i, ) self.q_values.append(qv) self.decision.append(decision_i) self.probs.append(probs_i) self.values.append(tf.reduce_sum(tf.multiply(qv, probs_i), axis=-1)) sq = tf.reduce_sum(tf.one_hot(decision_i, self.out_dims[_i]) * qv, axis=-1) self.selected_q.append(sq) sp = tf.reduce_sum(tf.one_hot(decision_i, self.out_dims[_i]) * probs_i, axis=-1) self.selected_prob.append(sp) self.decision = tf.stack(self.decision, axis=1) # [batch_size, decision_num] self.values = tf.stack(self.values, axis=1) # [batch_size, decision_num] self.selected_q = tf.stack(self.selected_q, axis=1) self.selected_prob = tf.stack(self.selected_prob, axis=1) else: raise NotImplementedError
def test_train_with_stages(self): model = BasicModel({ "stage": "second" }) model._first_stage = MagicMock() model._second_stage = MagicMock() model._third_stage = MagicMock() model._save = MagicMock() model.stages = [ ("first", model._first_stage), ("second", model._second_stage), ("third", model._third_stage) ] process = Mock( first=model._first_stage, second=model._second_stage, third=model._third_stage, save=model._save ) model.train() process.assert_has_calls([ call.second(), call.save(), call.third(), call.save() ]) model._first_stage.assert_not_called()
def build(self, _input, net, store_output_op=False): assert (isinstance(net, BasicModel)) output = _input if not self.ready: return output with tf.variable_scope(self._id): self._scope = tf.get_variable_scope().name param_initializer = self.param_initializer if self.pre_activation: # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # Pooling if self._type == 'avg': output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides) elif self._type == 'max': output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides) else: raise ValueError('Do not support the pooling type: %s' % self._type) else: # Pooling if self._type == 'avg': output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides) elif self._type == 'max': output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides) else: raise ValueError('Do not support the pooling type: %s' % self._type) # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # dropout output = BasicModel.dropout(output, self.keep_prob, net.is_training) if store_output_op: self.output_op = output return output
def test_submit(self): model = BasicModel({ "results_folder": "./" }) test_X = pd.DataFrame([[0, 1], [1, 0]], columns=["y", "n"]) test_y = pd.DataFrame([[0, 1], [1, 1]], columns=["id", "prediction"]) model._predict = MagicMock(return_value=[1, 1]) model.submit(test_X) results = pd.read_csv("./submission.csv") self.assertSequenceEqual(results.to_dict(), test_y.to_dict())
def get_user_list(cls, group_id=None): sql = '' if group_id == None: sql = f'select * from {cls.prefix}user_info_tbl where stat<3 order by id' else: sql = f'select * from {cls.prefix}user_info_tbl where stat<3 and group_id={group_id} order by id' return BasicModel.query_all(sql)
def build(self, _input, net, store_output_op=False): assert (isinstance(net, BasicModel)) output = _input if not self.ready: return output with tf.variable_scope(self._id): self._scope = tf.get_variable_scope().name param_initializer = self.param_initializer # flatten if not output = BasicModel.flatten(output) if self.pre_activation: # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # FC output = BasicModel.fc_layer( output, self.units, self.use_bias, param_initializer=param_initializer) else: # FC output = BasicModel.fc_layer( output, self.units, self.use_bias, param_initializer=param_initializer) # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # dropout output = BasicModel.dropout(output, self.keep_prob, net.is_training) if store_output_op: self.output_op = output return output
def test_preprocess_train_data(self): train_df = pd.DataFrame( [["text", "additional", 1]], columns=["text", "additional", "label"] ) train_df.to_csv("train.csv") config = { "data_folder": "./", "train_data_path": "train.csv" } model = BasicModel(config) def remove_additional(df): return df.drop(columns=["additional"]) model._preprocess_data = MagicMock(side_effect=remove_additional) model._preprocess_and_save_data(config["train_data_path"]) preprocessed_train_df = pd.read_csv("./preprocessed_train.csv") assert "text" in preprocessed_train_df.columns assert "label" in preprocessed_train_df.columns assert "additional" not in preprocessed_train_df.columns
def make_model(model_name): if model_name == available_models[0]: return AvgSeqDenseModel() elif model_name == available_models[1]: return StackedBiLSTMDenseModel() elif model_name == available_models[2]: return TransformerEncoderBiLSTMDenseModel() elif model_name == available_models[3]: return RNMTPlusEncoderBiLSTMDenseModel() elif model_name == available_models[4]: return MultiHeadAttnAvgDenseModel() else: return BasicModel()
def test_load_parameters(self): config = { "data_folder": "folder", "train_data_path": "train", "test_data_path": "test", "unlabeled_data_path": "unlabeled", "results_folder": "results", "stage": "stage" } model = BasicModel(config) assert model.train_data_path == "train" assert model.test_data_path == "test" assert model.unlabeled_data_path == "unlabeled" assert model.data_folder == "folder" assert model.results_folder == "results" assert model.stage == "stage"
def build(self, _input, net, store_output_op=False): assert (isinstance(net, BasicModel)) output = _input if not self.ready: return output with tf.variable_scope(self._id): self._scope = tf.get_variable_scope().name param_initializer = self.param_initializer if self.pre_activation: # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # convolutional output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides, param_initializer=param_initializer) else: # convolutional output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides, param_initializer=param_initializer) # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # dropout output = BasicModel.dropout(output, self.keep_prob, net.is_training) if store_output_op: self.output_op = output return output
def test_init_model(self): model = BasicModel({ "train_data_path": "train", "unlabeled_data_path": "unlabeled", "test_data_path": "test" }) model._preprocess_and_save_data = MagicMock() model._load = MagicMock() model.init() model._preprocess_and_save_data.assert_any_call("train") model._preprocess_and_save_data.assert_any_call("unlabeled") model._preprocess_and_save_data.assert_any_call("test") model._load.assert_called_with()
def build_training_process(self): c = tf.constant(0.8, dtype=tf.float32) # if self.wider_seg_deepr > 0, then get wide_side_obj, else wider_entropy = 0 wider_side_obj, wider_entropy = tf.cond( tf.greater(self.wider_seg_deeper, 0), lambda: self.get_wider_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) wider_side_obj = tf.reduce_sum(wider_side_obj * tf.minimum(c, self.wider_rho)) batch_size = array_ops.shape(self.reward)[0] deeper_side_obj, deeper_entropy = tf.cond( self.has_deeper, lambda: self.get_deeper_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) deeper_side_obj = tf.reduce_sum(deeper_side_obj * tf.minimum(c, self.deeper_rho)) self.obj = wider_side_obj + deeper_side_obj entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) entropy_term /= tf.cast(batch_size, tf.float32) g = -self.obj - self.entropy_penalty * entropy_term optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) self.train_step = [optimizer.minimize(g)] wq = tf.reshape(self.wider_actor.selected_q, [-1]) w_loss = tf.losses.mean_squared_error(self.wider_qrets, wq) self.update_wider_q = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(loss=w_loss) dq = tf.reshape(self.deeper_actor.selected_q, [-1]) d_loss = tf.losses.mean_squared_error(self.deeper_qrets, dq) self.update_deeper_q = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(loss=d_loss) # add baseline to training step if self.baseline_actor is not None: self.build_baseline_network()
def build_training_process(self): wider_side_obj, wider_entropy = tf.cond( tf.greater(self.wider_seg_deeper, 0), lambda: self.get_wider_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) batch_size = array_ops.shape(self.reward)[0] deeper_side_obj, deeper_entropy = tf.cond( self.has_deeper, lambda: self.get_deeper_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) self.obj = wider_side_obj + deeper_side_obj entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) entropy_term /= tf.cast(batch_size, tf.float32) optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) # print "in build, reward = {}".format(self.reward) self.train_step = optimizer.minimize(-self.obj - self.entropy_penalty * entropy_term)
def build_training_process(self): # if self.wider_seg_deepr > 0, then get wide_side_obj, else wider_entropy = 0 wider_side_obj, wider_entropy = tf.cond( tf.greater(self.wider_seg_deeper, 0), lambda: self.get_wider_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)) ) batch_size = array_ops.shape(self.reward)[0] deeper_side_obj, deeper_entropy = tf.cond( self.has_deeper, lambda: self.get_deeper_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)) ) self.obj = wider_side_obj + deeper_side_obj entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) entropy_term /= tf.cast(batch_size, tf.float32) optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) self.train_step = [optimizer.minimize(- self.obj - self.entropy_penalty * entropy_term)] # add baseline to training step if self.baseline_actor is not None: self.build_baseline_network()
args = parser.parse_args() return args if __name__ == '__main__': # get arguments args = parse() # set seeds torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed_all(args.seed) #model = VGGModel(vgg_name='VGG13') #model = resnet(model_name='resnet18', pretrained=False, num_classes=31) model_empty_vs_all = BasicModel(nlabels=2) if use_cuda: model_empty_vs_all.cuda() model_parameters = filter(lambda p: p.requires_grad, model_empty_vs_all.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Total number of parameters: {}\n'.format(params)) optimizer = optim.Adam(params=model_empty_vs_all.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=20, gamma=0.8) if args.load_model is None: epoch_start = 0 best_valid_acc = 0
pred[np.intersect1d(zero_idx, zero_idx_p)] = 21 pred[pred>=21] += 1 pdb.set_trace() correct += float(pred.eq(target.data).sum()) del output, data, target state['{}_acc'.format(mode)] = correct / len(data_loader.dataset) if __name__ == '__main__': # get arguments args = parse() model_empty_vs_all = BasicModel(nlabels=2) if use_cuda: model_empty_vs_all.cuda() model_parameters = filter(lambda p: p.requires_grad, model_empty_vs_all.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Total number of parameters: {}\n'.format(params)) model = AttentionLocalizationModel(nlabels=30) if use_cuda: model.cuda() model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Total number of parameters: {}\n'.format(params))
import tensorflow as tf from models.basic_model import BasicModel from models.convolutional_model import ConvModel from dataset import MnistDataset epochs = 5 batch_size = 4 train_set = MnistDataset('data/train') n_batches = len(train_set) // batch_size model = BasicModel(resolution=[28, 28], channels=1) # model = ConvModel(resolution=[28, 28], channels=1) saver = tf.train.Saver( ) # We use this to save the model. Instantiate it after all Variables have been created label_placeholder = tf.placeholder(tf.float32, shape=[batch_size, 10]) loss = tf.losses.softmax_cross_entropy(label_placeholder, model.predictions) update = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) top_predictions = tf.argmax(model.predictions, axis=1) # probabilities -> top prediction top_labels = tf.argmax(label_placeholder, axis=1) # one_hot -> number correct = tf.equal(top_predictions, top_labels) # bool Tensor accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) # Average correct guesses with tf.Session() as sess: sess.run(tf.global_variables_initializer())
def get_model(self): model = gconfig.get_config_param("model", "str") if model == "basic_conv_graph": model = BasicModel() elif model == "conv_graph_dgcnn_fast_conv": model = BasicModel() model.set_conv_segment(FastConvSegment()) model.set_graph_segment(DgcnnSegment()) elif model == "conv_graph_garnet_fast_conv": model = BasicModel() model.set_conv_segment(FastConvSegment()) model.set_graph_segment(GarNetSegment()) elif model == "conv_fcnn_fast_conv": model = BasicModel() model.set_conv_segment(FastConvSegment()) model.set_graph_segment(FcnnSegment()) elif model == "conv_grav_net_fast_conv": model = BasicModel() model.set_conv_segment(FastConvSegment()) model.set_graph_segment(GravnetSegment()) else: return ModelInterface() # TODO: Fix this return model
graph_encoder = GraphEncoder() with open('basic_model_graph_encoder.pickle', 'wb') as out: pickle.dump(graph_encoder, out) np.savez_compressed(data_file, data=data, no_labels=no_labels, all_labels=all_labels) model_attributes = { 'afm': 30, 'bfm': 8, 'mfm': 30, 'adj': 1, 'out': 120, 'classification_output': 2 } model = nn.Sequential( GraphWrapper(BasicModel(model_attributes['afm'], model_attributes['bfm'], model_attributes['mfm'], model_attributes['adj'], model_attributes['out'])), nn.BatchNorm1d(model_attributes['out']), nn.Linear(model_attributes['out'], 60), nn.ReLU(), nn.Linear(60, 30), nn.ReLU(), nn.Linear(30, 15), nn.ReLU(), nn.Linear(15, model_attributes['classification_output']) ) # # selected_label = np.random.choice(np.arange(no_labels)) selected_label = 243 print "Target selected: {}".format(selected_label) for graph in data:
def update_user_stat_by_id(cls, id, stat): sql = f'update {cls.prefix}user_info_tbl set stat={stat} where id={id}' return BasicModel.update(sql)
def get_user_info_by_id(cls, id): sql = f'select * from {cls.prefix}user_info_tbl where id={id}' return BasicModel.query_all(sql)[0]