def __init__(self, pc, vocab, layers, state_dim, final_hidden_dim, tied, residual): self.vocab = vocab self.layers = layers self.state_dim = state_dim self.tied = tied self.residual = residual self.done_with_left = vocab.convert('</LEFT>') self.done_with_right = vocab.convert('</RIGHT>') vocab_size = len(self.vocab) self.pc = pc.add_subcollection() if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (vocab_size, state_dim)) self.top_lstm = dy.LSTMBuilder(layers, state_dim, state_dim, self.pc) self.vertical_lstm = dy.LSTMBuilder(layers, state_dim, state_dim, self.pc) self.gate_mlp = MLP(self.pc, [2 * state_dim, state_dim, state_dim]) self.open_constit_lstms = [] self.debug_stack = [] self.spine = [] self.final_mlp = MLP(self.pc, [state_dim, final_hidden_dim, vocab_size]) self.top_initial_state = [ self.pc.add_parameters((state_dim, )) for _ in range(2 * layers) ] self.open_initial_state = [ self.pc.add_parameters((state_dim, )) for _ in range(2 * layers) ]
def computeLoss2(self, task_id, internal_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights['regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression( self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) diff = computed_values - self.placeholders['target_values'][ internal_id, :] task_target_mask = self.placeholders['target_mask'][internal_id, :] task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER diff = diff * task_target_mask # Mask out unused values self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num # Normalise loss to account for fewer task-specific examples in batch: task_loss = task_loss * ( 1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) self.ops['losses'].append(task_loss)
def __init__(self, config: Config, num_spherical=7, num_radial=6, envelope_exponent=5): super(MXMNet, self).__init__() self.dim = config.dim # 128 self.n_layer = config.n_layer # 6 self.cutoff = config.cutoff # 5 self.embeddings = nn.Parameter(torch.ones((5, self.dim))) # (5,128) : (원자 인덱싱, 원자 특성) self.rbf_l = BesselBasisLayer(16, 5, envelope_exponent) self.rbf_g = BesselBasisLayer(16, self.cutoff, envelope_exponent) self.sbf = SphericalBasisLayer(num_spherical, num_radial, 5, envelope_exponent) self.rbf_g_mlp = MLP([16, self.dim]) self.rbf_l_mlp = MLP([16, self.dim]) self.sbf_1_mlp = MLP([num_spherical * num_radial, self.dim]) self.sbf_2_mlp = MLP([num_spherical * num_radial, self.dim]) self.global_layers = torch.nn.ModuleList() for layer in range(config.n_layer): self.global_layers.append(Global_MP(config)) self.local_layers = torch.nn.ModuleList() for layer in range(config.n_layer): self.local_layers.append(Local_MP(config)) self.init()
def __init__(self, word_embedding_dim, hidden_dim, vocab_size, tag_vocab_size, tag_embedding_dim, label_amount, dropout, char_emb_dim, char_vocab_size): super(DependencyParser, self).__init__() self.dropout = nn.Dropout(dropout) self.hidden_dim = hidden_dim self.word_embeddings = nn.Embedding(vocab_size, word_embedding_dim) self.tag_embeddings = nn.Embedding(tag_vocab_size, tag_embedding_dim) self.char_embeddings = nn.Embedding(char_vocab_size, char_emb_dim) self.char_attention = nn.Linear(char_emb_dim, 1) self.lstm = nn.LSTM(word_embedding_dim + tag_embedding_dim, hidden_dim, num_layers=1, bidirectional=True, dropout=dropout) self.char_lstm = nn.LSTM(char_emb_dim, 20, num_layers=1, dropout=dropout) self.hidden_to_relu_dep = nn.Linear(hidden_dim, hidden_dim) self.hidden_to_relu_head = nn.Linear(hidden_dim, hidden_dim) self.arc_dep = MLP(hidden_dim * 2, hidden_dim, 1, dropout) self.arc_head = MLP(hidden_dim * 2, hidden_dim, 1, dropout) self.label_dep = MLP(hidden_dim * 2, hidden_dim, 1, dropout) self.label_head = MLP(hidden_dim * 2, hidden_dim, 1, dropout) # add 1 for bias self.bi_affine_arcs = nn.Linear(hidden_dim + 1, hidden_dim, bias=False) self.bi_affine_labels_weights = nn.Parameter( torch.Tensor(label_amount, hidden_dim + 1, hidden_dim + 1)) self.bi_affine_labels_weights.data.normal_(0, 1)
def make_task_output_model( self, placeholders: Dict[str, tf.Tensor], model_ops: Dict[str, tf.Tensor], ) -> None: placeholders['graph_nodes_list'] = \ tf.placeholder(dtype=tf.int32, shape=[None], name='graph_nodes_list') placeholders['target_values'] = \ tf.placeholder(dtype=tf.float32, shape=[len(self.params['task_ids']), None], name='target_values') placeholders['out_layer_dropout_keep_prob'] = \ tf.placeholder(dtype=tf.float32, shape=[], name='out_layer_dropout_keep_prob') task_metrics = {} losses = [] final_node_feature_size = model_ops[ 'final_node_representations'].shape.as_list()[-1] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): regression_gate = \ MLP(self.initial_node_feature_size + final_node_feature_size, 1, [], placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): regression_transform = \ MLP(final_node_feature_size, 1, [], placeholders['out_layer_dropout_keep_prob']) per_node_outputs = regression_transform( model_ops['final_node_representations']) gate_input = tf.concat([ model_ops['final_node_representations'], model_ops['initial_node_features'] ], axis=-1) per_node_gated_outputs = tf.nn.sigmoid( regression_gate(gate_input)) * per_node_outputs # Sum up all nodes per-graph per_graph_outputs = tf.unsorted_segment_sum( data=per_node_gated_outputs, segment_ids=placeholders['graph_nodes_list'], num_segments=placeholders['num_graphs']) per_graph_outputs = tf.squeeze(per_graph_outputs) # [g] per_graph_errors = per_graph_outputs - placeholders[ 'target_values'][internal_id, :] task_metrics['abs_err_task%i' % task_id] = tf.reduce_sum( tf.abs(per_graph_errors)) tf.summary.scalar( 'mae_task%i' % task_id, task_metrics['abs_err_task%i' % task_id] / tf.cast(placeholders['num_graphs'], tf.float32)) losses.append(tf.reduce_mean(0.5 * tf.square(per_graph_errors))) model_ops['task_metrics'] = task_metrics model_ops['task_metrics']['loss'] = tf.reduce_sum(losses) model_ops['task_metrics'][ 'total_loss'] = model_ops['task_metrics']['loss'] * tf.cast( placeholders['num_graphs'], tf.float32)
def make_model(self): self.placeholders['target_values'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int64, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( ) else: self.ops['final_node_representations'] = tf.zeros_like( self.placeholders['initial_node_representation']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights[ 'regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression( self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) diff = computed_values - self.placeholders['target_values'][ internal_id, :] task_target_mask = self.placeholders['target_mask'][ internal_id, :] task_target_num = tf.reduce_sum( task_target_mask) + SMALL_NUMBER diff = diff * task_target_mask # Mask out unused values self.ops['accuracy_task%i' % task_id] = tf.reduce_sum( tf.abs(tf.round(tf.abs(diff)) - 1)) task_loss = tf.reduce_sum( 0.5 * tf.square(diff)) / task_target_num # Normalise loss to account for fewer task-specific examples in batch: task_loss = task_loss * ( 1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) self.ops['losses'].append(task_loss) self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
def __init__(self, config): super(Local_MP, self).__init__() self.dim = config.dim self.h_mlp = MLP([self.dim, self.dim]) self.mlp_kj = MLP([3 * self.dim, self.dim]) self.mlp_ji_1 = MLP([3 * self.dim, self.dim]) self.mlp_ji_2 = MLP([self.dim, self.dim]) self.mlp_jj = MLP([self.dim, self.dim]) self.mlp_sbf1 = MLP([self.dim, self.dim, self.dim]) self.mlp_sbf2 = MLP([self.dim, self.dim, self.dim]) self.lin_rbf1 = nn.Linear(self.dim, self.dim, bias=False) self.lin_rbf2 = nn.Linear(self.dim, self.dim, bias=False) self.res1 = Res(self.dim) self.res2 = Res(self.dim) self.res3 = Res(self.dim) self.lin_rbf_out = nn.Linear(self.dim, self.dim, bias=False) self.h_mlp = MLP([self.dim, self.dim]) self.y_mlp = MLP([self.dim, self.dim, self.dim, self.dim]) self.y_W = nn.Linear(self.dim, 1)
def make_model(self): self.placeholders['target_values'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob') self.placeholders['pre_id_vector'] = tf.placeholder(tf.float32, [None, len(self.params['task_ids'])], name='pre_id_vector') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops['final_node_representations'] = self.compute_final_node_representations() else: self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation']) self.ops['losses'] = [] self.ops['predicted_values'] = [] with tf.variable_scope("regression_gate"): self.weights['regression_gate_task'] = MLP(2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): # with tf.variable_scope("regression_gate"): # self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 1, [], # self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) self.weights['context_embedding_task%i' % task_id] = MLP(len(self.params['task_ids']), 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression(self.ops['final_node_representations'], self.weights['regression_gate_task'], self.weights['regression_transform_task%i' % task_id]) context_values = tf.squeeze(self.weights['context_embedding_task%i' % task_id](self.placeholders['pre_id_vector'])) computed_values = computed_values + context_values predictions = tf.nn.sigmoid(computed_values) self.ops['predicted_values'].append(predictions) diff = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.placeholders['target_values'][internal_id,:],logits=computed_values) task_target_mask = self.placeholders['target_mask'][internal_id,:] task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER diff = diff * task_target_mask # Mask out unused values self.ops['accuracy_task%i' % task_id] = self.masked_accuracy(predictions, self.placeholders['target_values'][internal_id,:], task_target_mask) task_loss = tf.reduce_sum(diff) / task_target_num # Normalise loss to account for fewer task-specific examples in batch: task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) self.ops['losses'].append(task_loss) self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
def __init__(self, input_dims, hid_dim=32, kernel_size=(3, 3), bn_kwargs={}): super(Core, self).__init__() # preparation C, H, W = input_dims assert H == W fc_dim = C * H * W # flatten dimensions # padding to retain the layer size padding = [int((ks - 1) / 2) for ks in kernel_size] self.flatten = nn.Flatten() # value network self.value_net = MLP([fc_dim, hid_dim, H], batch_norm=True, bn_kwargs=bn_kwargs) # internal abstraction self.conv_net = nn.Sequential( nn.Conv2d(hid_dim, hid_dim, kernel_size=kernel_size, padding=padding), nn.BatchNorm2d(hid_dim), nn.ReLU()) # MRP model self.reward_net = MLP([fc_dim, hid_dim, H], batch_norm=True, bn_kwargs=bn_kwargs) # sigmoid to ensure the gammas and lambdas are in [-1, 1] self.gamma_net = MLP([fc_dim, hid_dim, H], batch_norm=True, activ_out=nn.Sigmoid, bn_kwargs=bn_kwargs) self.lambda_net = MLP([fc_dim, hid_dim, H], batch_norm=True, activ_out=nn.Sigmoid, bn_kwargs=bn_kwargs) # internal transition network self.state_net = nn.Sequential( nn.Conv2d(hid_dim, hid_dim, kernel_size=kernel_size, padding=padding), nn.BatchNorm2d(hid_dim, **bn_kwargs), nn.ReLU(), nn.Conv2d(hid_dim, hid_dim, kernel_size=kernel_size, padding=padding), nn.BatchNorm2d(hid_dim, **bn_kwargs), nn.ReLU())
def make_model(self): num_task_id = len(self.params['task_ids']) self.placeholders['target_values'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_values') self.placeholders['target_mask'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops['final_node_representations'] = self.compute_final_node_representations() else: self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 2, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 2, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression(self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) #computed_values = tf.Print(computed_values-0.5, [computed_values-0.5, tf.shape(computed_values)], 'computed_values', summarize = 150) tv = self.placeholders['target_values'][internal_id,:] #tf.squeeze( #tv = tf.Print(tv, [tv, tf.shape(tv)], 'tv', summarize = 150) # if computed_values.shape.as_list() == tv.shape.as_list(): # tv = tf.squeeze(tv) labels = tf.argmax(tv, 1) prediction = tf.argmax(computed_values, 1) accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, labels), tf.float32)) task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=computed_values, labels=tv)) TP = tf.reduce_sum(prediction*labels) TN = tf.reduce_sum((1-prediction)*(1-labels)) FP = tf.reduce_sum(prediction*(1-labels)) FN = tf.reduce_sum((1-prediction)*labels) precision = TP / (TP + FP) recall = TP / (TP + FN) f1 = 2 * precision * recall / (precision + recall) self.ops['accuracy_task%i' % task_id] = accuracy self.ops['losses'].append(task_loss) self.ops['precision_task%i' % task_id] = precision self.ops['recall_task%i' % task_id] = recall self.ops['f1_task%i' % task_id] = f1 self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
def __init__(self, config): super(Global_MP, self).__init__() self.dim = config.dim self.h_mlp = MLP([self.dim, self.dim]) self.res1 = Res(self.dim) self.res2 = Res(self.dim) self.res3 = Res(self.dim) self.mlp = MLP([self.dim, self.dim]) self.x_edge_mlp = MLP([self.dim * 3, self.dim]) self.linear = nn.Linear(self.dim, self.dim, bias=False)
def __init__(self, pc, layers, emb_dim, hidden_dim, vocab_size, tied): self.spec = (layers, emb_dim, hidden_dim, vocab_size) self.pc = pc.add_subcollection() self.rnn = dy.LSTMBuilder(layers, emb_dim, hidden_dim, self.pc) self.initial_state_params = [ self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers) ] self.output_mlp = MLP(self.pc, [hidden_dim, hidden_dim, vocab_size]) self.tied = tied if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (vocab_size, emb_dim)) self.dropout_rate = 0.0
def make_model(self): self.placeholders['num_graphs'] = tf.placeholder(tf.int64, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model", reuse = tf.AUTO_REUSE): self.prepare_specific_graph_model() # This does the actual graph work: support_final_node_representations = self.get_feature(self.placeholders['support_x'],self.placeholders['support_roi'],self.placeholders['support_adj'],self.placeholders['is_training']) #[5,v,4096] target_final_node_representations = self.get_feature(self.placeholders['target_x'],self.placeholders['target_roi'],self.placeholders['target_adj'],self.placeholders['is_training']) #[75,v,4096] with tf.variable_scope("out_layer", reuse = tf.AUTO_REUSE): with tf.variable_scope("regression_gate"): self.weights['regression_node'] = MLP(self.params['out_size'], 5, [], self.placeholders['out_layer_dropout_keep_prob']) node_loss = tf.constant(0.0) print('Node supervision') tv = self.placeholders['num_vertices']-1 #sv = self.placeholders['support_v'] v = self.placeholders['num_vertices'] #qv = self.placeholders['target_v'] support_node = support_final_node_representations[:, 1:, :] node_loss = [] for i in range(5): last_h1 = tf.reshape(support_node[i], [-1, self.params['out_size']]) node_out = self.weights['regression_node'](last_h1) node_out = tf.reshape(node_out, [-1, 5]) node_labels = tf.tile(tf.reshape(self.placeholders['support_label'][i],(1,1)),[1, tv]) print(node_labels.get_shape().as_list()) node_labels = tf.reshape(node_labels, [-1]) node_loss.append(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=node_labels,logits=node_out)) target_node = target_final_node_representations[:, 1:, :] for i in range(50): last_h1 = tf.reshape(target_node[i], [-1, self.params['out_size']]) node_out = self.weights['regression_node'](last_h1) node_out = tf.reshape(node_out, [-1, 5]) node_labels = tf.tile(tf.reshape(self.placeholders['target_label'][i],(1,1)),[1, tv]) node_labels = tf.reshape(node_labels, [-1]) node_loss.append(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=node_labels,logits=node_out)) node_loss = tf.reduce_mean(tf.concat(node_loss,0)) * self.params['node_lambda'] support_final_node_representations = tf.reshape(support_final_node_representations[:,0,:],[5,self.params['out_size']]) target_final_node_representations = tf.reshape(target_final_node_representations[:,0,:],[50,self.params['out_size']]) similarities = cosine_d(target_final_node_representations,support_final_node_representations) similarities = tf.reshape(similarities, [50, 5]) support_set_labels = tf.one_hot(self.placeholders['support_label'], 5) preds = tf.squeeze(tf.matmul(tf.nn.softmax(similarities), support_set_labels)) correct_prediction = tf.equal(tf.argmax(preds, 1), tf.cast(self.placeholders['target_label'], tf.int64)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) targets = tf.one_hot(self.placeholders['target_label'], 5) print("preds ",preds.get_shape().as_list()) print("targets ",targets.get_shape().as_list()) mean_square_error_loss = tf.reduce_mean(tf.square((preds-1)*targets + preds*(1-targets))) self.ops['accuracy'] = accuracy self.ops['loss'] = mean_square_error_loss + node_loss
def make_model(self): self.placeholders['target_values'] = tf.placeholder( tf.float32, [None], name='target_values') # self.placeholders['target_mask'] = tf.placeholder(tf.float32, [None], # name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( ) with tf.variable_scope("out_layer_task"): with tf.variable_scope("regression"): self.weights['regression_transform_task'] = MLP( 2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) accuracy, task_loss = self.regression( self.ops['final_node_representations'], self.weights['regression_transform_task']) # task_target_mask = self.placeholders['target_mask'][internal_id, :] # task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER # diff = diff * task_target_mask # Mask out unused values self.ops['accuracy_task'] = accuracy # Normalise loss to account for fewer task-specific examples in batch: task_loss = task_loss * (1.0 / self.params['task_sample_ratios']) self.ops['loss'] = task_loss
def __init__(self, action_set, reward_function, feature_extractor, hidden_dims=[50, 50], learning_rate=5e-4, buffer_size=50000, batch_size=64, num_batches=100, starts_learning=5000, final_epsilon=0.02, discount=0.99, target_freq=10, verbose=False, print_every=1, test_model_path=None): Agent.__init__(self, action_set, reward_function) self.feature_extractor = feature_extractor self.feature_dim = self.feature_extractor.dimension # build Q network # we use a multilayer perceptron dims = [self.feature_dim] + hidden_dims + [len(self.action_set)] self.model = MLP(dims) if test_model_path is None: self.test_mode = False self.learning_rate = learning_rate self.buffer_size = buffer_size self.batch_size = batch_size self.num_batches = num_batches self.starts_learning = starts_learning self.epsilon = 1.0 # anneals starts_learning/(starts_learning + t) self.final_epsilon = 0.02 self.timestep = 0 self.discount = discount self.buffer = Buffer(self.buffer_size) self.target_net = MLP(dims) self.target_net.load_state_dict(self.model.state_dict()) self.target_net.eval() self.target_freq = target_freq # target nn updated every target_freq episodes self.num_episodes = 0 self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate) # for debugging purposes self.verbose = verbose self.running_loss = 1. self.print_every = print_every else: self.test_mode = True self.model.load_state_dict(torch.load(test_model_path)) self.model.eval()
def make_model(self): self.placeholders['target_values'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['num_graphs'] = tf.placeholder(tf.int64, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( ) else: self.ops['final_node_representations'] = tf.zeros_like( self.placeholders['initial_node_representation']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights[ 'regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression( self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) diff = computed_values - self.placeholders['target_values'][ internal_id, :] self.ops['accuracy_task%i' % task_id] = tf.reduce_mean( tf.abs(diff)) self.ops['losses'].append(tf.reduce_mean(0.5 * diff**2)) self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
def computeLoc(self, tokenRep, finalRep): name = "colsplit" hidden_size = self.params['hidden_size'] with tf.variable_scope(name): with tf.variable_scope("W1"): W1 = MLP(hidden_size, hidden_size, [], 1.0) with tf.variable_scope("W2"): W2 = MLP(hidden_size, hidden_size, [], 1.0) with tf.variable_scope("W3"): W3 = MLP(hidden_size, 2, [], 1.0) mask = self.placeholders['nodeMask'] nodeIndexInGraph = self.placeholders[ 'nodeIndexInGraph'] #[#Nodes,1] H2 = tf.gather_nd(finalRep, nodeIndexInGraph) #[#Nodes, 100] H2 = tf.boolean_mask(H2, mask) H1 = tf.boolean_mask(tokenRep, mask) E1 = W1(H1) + W2(H2) # [#Nodes, 100] E2 = W3(E1) #[#Nodes, 2] newE2 = tf.transpose(E2) #[2, #Nodes] return newE2
def __init__(self, T): super().__init__() self.T = T self.vert_encoder = MLP([256, 128, 32]) self.edge_encoder = MLP([6, 18, 18, 16]) self.edge_mlp = MLP([96 + 32 + 32, 80, 16]) self.vert_mlp = MLP([64, 32, 32]) self.flow_in_mlp = MLP([48, 56, 32]) self.flow_out_mlp = MLP([48, 56, 32]) self.edge_classifier = MLP([16, 16, 1], last_act=None)
def __init__(self, pc, action_vocab, word_vocab_size, rel_vocab_size, layers, hidden_dim, labelled=True, tied=False): self.labelled = labelled self.tied = tied self.action_vocab = action_vocab self.pc = pc.add_subcollection() action_vocab_size = len(action_vocab) if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (word_vocab_size, hidden_dim)) self.action_mlp = MLP(self.pc, [hidden_dim, hidden_dim, action_vocab_size]) self.word_mlp = MLP(self.pc, [hidden_dim, hidden_dim, word_vocab_size]) self.combine_mlp = MLP(self.pc, [2 * hidden_dim, hidden_dim, hidden_dim]) self.stack_lstm = dy.LSTMBuilder(layers, hidden_dim, hidden_dim, self.pc) self.initial_state_params = [ self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers) ] self.stack_embs = [] if labelled: self.rel_embs = self.pc.add_lookup_parameters( (rel_vocab_size, hidden_dim)) self.rel_mlp = MLP(self.pc, [hidden_dim, hidden_dim, rel_vocab_size])
class RNNLM: def __init__(self, pc, layers, emb_dim, hidden_dim, vocab_size, tied): self.spec = (layers, emb_dim, hidden_dim, vocab_size) self.pc = pc.add_subcollection() self.rnn = dy.LSTMBuilder(layers, emb_dim, hidden_dim, self.pc) self.initial_state_params = [ self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers) ] self.output_mlp = MLP(self.pc, [hidden_dim, hidden_dim, vocab_size]) self.tied = tied if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (vocab_size, emb_dim)) self.dropout_rate = 0.0 def new_graph(self): self.output_mlp.new_graph() self.initial_state = [ dy.parameter(p) for p in self.initial_state_params ] #self.exp = dy.scalarInput(-0.5) def set_dropout(self, r): self.dropout_rate = r self.output_mlp.set_dropout(r) self.rnn.set_dropout(r) def embed_word(self, word): if self.tied: word_embs = self.output_mlp.layers[-1].w word_emb = dy.select_rows(word_embs, [word]) word_emb = dy.transpose(word_emb) else: word_emb = dy.lookup(self.word_embs, word) # Normalize word vectors to have length one #word_emb_norm = dy.pow(dy.dot_product(word_emb, word_emb), self.exp) #word_emb = word_emb * word_emb_norm return word_emb def build_graph(self, sent): state = self.rnn.initial_state() state = state.set_s(self.initial_state) losses = [] for word in sent: assert state != None so = state.output() assert so != None output_dist = self.output_mlp(so) loss = dy.pickneglogsoftmax(output_dist, word) losses.append(loss) word_emb = self.embed_word(word) if self.dropout_rate > 0.0: word_emb = dy.dropout(word_emb, self.dropout_rate) state = state.add_input(word_emb) return dy.esum(losses) def sample(self, eos, max_len): #dy.renew_cg() #self.new_graph() state = self.rnn.initial_state() state = state.set_s(self.initial_state) sent = [] while len(sent) < max_len: assert state != None so = state.output() assert so != None output_dist = dy.softmax(self.output_mlp(so)) output_dist = output_dist.vec_value() word = sample(output_dist) sent.append(word) if word == eos: break word_emb = self.embed_word(word) state = state.add_input(word_emb) return sent def param_collection(self): return self.pc @staticmethod def from_spec(spec, pc): rnnlm = RNNLM(pc, *spec) return rnnlm
class TopDownDepLM: def __init__(self, pc, vocab, layers, state_dim, final_hidden_dim, tied, residual): self.vocab = vocab self.layers = layers self.state_dim = state_dim self.tied = tied self.residual = residual self.done_with_left = vocab.convert('</LEFT>') self.done_with_right = vocab.convert('</RIGHT>') vocab_size = len(self.vocab) self.pc = pc.add_subcollection() if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (vocab_size, state_dim)) self.top_lstm = dy.LSTMBuilder(layers, state_dim, state_dim, self.pc) self.vertical_lstm = dy.LSTMBuilder(layers, state_dim, state_dim, self.pc) self.gate_mlp = MLP(self.pc, [2 * state_dim, state_dim, state_dim]) self.open_constit_lstms = [] self.debug_stack = [] self.spine = [] self.final_mlp = MLP(self.pc, [state_dim, final_hidden_dim, vocab_size]) self.top_initial_state = [ self.pc.add_parameters((state_dim, )) for _ in range(2 * layers) ] self.open_initial_state = [ self.pc.add_parameters((state_dim, )) for _ in range(2 * layers) ] def set_dropout(self, r): self.dropout_rate = r self.top_lstm.set_dropout(r) self.vertical_lstm.set_dropout(r) self.final_mlp.set_dropout(r) def new_graph(self): # Do LSTM builders need reset? self.final_mlp.new_graph() self.gate_mlp.new_graph() def embed_word(self, word): if self.tied: word_embs = self.final_mlp.layers[-1].w word_emb = dy.select_rows(word_embs, [word]) word_emb = dy.transpose(word_emb) else: word_emb = dy.lookup(self.word_embs, word) return word_emb def add_to_last(self, word): assert len(self.open_constit_lstms) > 0 word_emb = self.embed_word(word) new_rep = self.open_constit_lstms[-1].add_input(word_emb) self.open_constit_lstms[-1] = new_rep self.debug_stack[-1].append(self.vocab.to_word(word)) def pop_and_add(self, word): assert len(self.open_constit_lstms) >= 1 word_emb = self.embed_word(word) child_state = self.open_constit_lstms[-1].add_input(word_emb) child_emb = child_state.output() self.open_constit_lstms.pop() if len(self.open_constit_lstms) > 0: self.open_constit_lstms[-1] = self.open_constit_lstms[ -1].add_input(child_emb) self.spine.pop() self.debug_stack[-1].append(self.vocab.to_word(word)) debug_child = self.debug_stack.pop() if len(self.debug_stack) > 0: self.debug_stack[-1].append(debug_child) def push(self, word): word_emb = self.embed_word(word) new_state = self.vertical_lstm.initial_state() new_state = new_state.set_s(self.open_initial_state) new_state = new_state.add_input(word_emb) self.open_constit_lstms.append(new_state) self.spine.append(word) self.debug_stack.append([self.vocab.to_word(word)]) def add_input(self, state, word): word_emb = self.embed_word(word) if word == self.done_with_left: self.add_to_last(word) elif word == self.done_with_right: self.pop_and_add(word) else: self.push(word) #print('After:', self.debug_stack) assert len(self.debug_stack) == len(self.open_constit_lstms) return ParserState(self.open_constit_lstms, self.spine) def new_sent(self): new_state = self.vertical_lstm.initial_state() new_state = new_state.set_s(self.open_initial_state) self.open_constit_lstms = [new_state] self.spine = [-1] self.debug_stack = [[]] return ParserState(self.open_constit_lstms, self.spine) def debug_embed_vertical(self, vertical): state = self.vertical_lstm.initial_state() state = state.set_s(self.open_initial_state) for word in vertical: if type(word) == list: emb = self.debug_embed_vertical(word) else: emb = self.embed_word(self.vocab.convert(word)) state = state.add_input(emb) return state.output() def debug_embed(self): top_state = self.top_lstm.initial_state() top_state = top_state.set_s(self.top_initial_state) assert len(self.open_constit_lstms) == len(self.debug_stack) for i, open_constit in enumerate(self.debug_stack): emb = self.debug_embed_vertical(open_constit) top_state = top_state.add_input(emb) alt = self.open_constit_lstms[i] #c = 'O' if np.isclose(emb.npvalue(), alt.output().npvalue()).all() else 'X' #print(c, emb.npvalue(), alt.output().npvalue()) #assert np.isclose(emb.npvalue(), alt.output().npvalue()).all() #print() return top_state warned = False def compute_loss(self, state, word): top_state = self.top_lstm.initial_state() top_state = top_state.set_s(self.top_initial_state) assert len(state.open_constits) == len(state.spine) for open_constit, spine_word in zip(state.open_constits, state.spine): constit_emb = open_constit.output() if self.residual and spine_word != -1: spine_word_emb = self.embed_word(spine_word) if False: constit_emb += spine_word_emb else: inp = dy.concatenate([constit_emb, spine_word_emb]) mask = self.gate_mlp(inp) mask = dy.logistic(mask) constit_emb = dy.cmult(1 - mask, constit_emb) constit_emb = constit_emb + dy.cmult(mask, spine_word_emb) top_state = top_state.add_input(constit_emb) #debug_top_state = self.debug_embed() #assert np.isclose(top_state.output().npvalue(), debug_top_state.output().npvalue()).all() logits = self.final_mlp(top_state.output()) loss = dy.pickneglogsoftmax(logits, word) #if not self.warned: # sys.stderr.write('WARNING: compute_loss hacked to not include actual terminals.\n') # self.warned = True #if word != 0 and word != 1: # probs = -dy.softmax(logits) # left_prob = dy.pick(probs, 0) # right_prob = dy.pick(probs, 1) # loss = dy.log(1 - left_prob - right_prob) #else: # loss = dy.pickneglogsoftmax(logits, word) return loss def build_graph(self, sent): state = self.new_sent() losses = [] for word in sent: loss = self.compute_loss(state, word) losses.append(loss) state = self.add_input(state, word) return dy.esum(losses)
def sparse_gnn_edge_mlp_layer(node_embeddings: tf.Tensor, adjacency_lists: List[tf.Tensor], type_to_num_incoming_edges: tf.Tensor, state_dim: Optional[int], num_timesteps: int = 1, activation_function: Optional[str] = "ReLU", message_aggregation_function: str = "sum", normalize_by_num_incoming: bool = False, use_target_state_as_input: bool = True, num_edge_hidden_layers: int = 1) -> tf.Tensor: """ Compute new graph states by neural message passing using an edge MLP. For this, we assume existing node states h^t_v and a list of per-edge-type adjacency matrices A_\ell. We compute new states as follows: h^{t+1}_v := \sum_\ell \sum_{(u, v) \in A_\ell} \sigma(1/c_{v,\ell} * MLP(h^t_u || h^t_v)) c_{\v,\ell} is usually 1 (but could also be the number of incoming edges). The learnable parameters of this are the W_\ell, F_{\ell,\alpha}, F_{\ell,\beta} \in R^{D, D}. We use the following abbreviations in shape descriptions: * V: number of nodes * D: state dimension * L: number of different edge types * E: number of edges of a given edge type Arguments: node_embeddings: float32 tensor of shape [V, D], the original representation of each node in the graph. adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge of type l connects node v to node u. type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v] is the number of edge of type l connecting to node v. state_dim: Optional size of output dimension of the GNN layer. If not set, defaults to D, the dimensionality of the input. If different from the input dimension, parameter num_timesteps has to be 1. num_timesteps: Number of repeated applications of this message passing layer. activation_function: Type of activation function used. message_aggregation_function: Type of aggregation function used for messages. normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number of incoming edges). use_target_state_as_input: Flag indicating if the edge MLP should consume both source and target state (True) or only source state (False). num_edge_hidden_layers: Number of hidden layers of the edge MLP. message_weights_dropout_ratio: Dropout ratio applied to the weights used to compute message passing functions. Returns: float32 tensor of shape [V, state_dim] """ num_nodes = tf.shape(input=node_embeddings, out_type=tf.int32)[0] if state_dim is None: state_dim = tf.shape(input=node_embeddings, out_type=tf.int32)[1] # === Prepare things we need across all timesteps: activation_fn = get_activation(activation_function) message_aggregation_fn = get_aggregation_function( message_aggregation_function) edge_type_to_edge_mlp = [] # MLPs to compute the edge messages edge_type_to_message_targets = [] # List of tensors of message targets for edge_type_idx, adjacency_list_for_edge_type in enumerate( adjacency_lists): edge_type_to_edge_mlp.append( MLP(out_size=state_dim, hidden_layers=num_edge_hidden_layers, activation_fun=tf.nn.elu, name="Edge_%i_MLP" % edge_type_idx)) edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) # Let M be the number of messages (sum of all E): message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] cur_node_states = node_embeddings for _ in range(num_timesteps): messages_per_type = [] # list of tensors of messages of shape [E, D] # Collect incoming messages per edge type for edge_type_idx, adjacency_list_for_edge_type in enumerate( adjacency_lists): edge_sources = adjacency_list_for_edge_type[:, 0] edge_targets = adjacency_list_for_edge_type[:, 1] edge_source_states = \ tf.nn.embedding_lookup(params=cur_node_states, ids=edge_sources) # Shape [E, D] edge_mlp_inputs = edge_source_states if use_target_state_as_input: edge_target_states = \ tf.nn.embedding_lookup(params=cur_node_states, ids=edge_targets) # Shape [E, D] edge_mlp_inputs = tf.concat( [edge_source_states, edge_target_states], axis=1) # Shape [E, 2*D] messages = edge_type_to_edge_mlp[edge_type_idx]( edge_mlp_inputs) # Shape [E, D] if normalize_by_num_incoming: per_message_num_incoming_edges = \ tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :], ids=edge_targets) # Shape [E, H] messages = tf.expand_dims( 1.0 / (per_message_num_incoming_edges + SMALL_NUMBER), axis=-1) * messages messages_per_type.append(messages) all_messages = tf.concat(messages_per_type, axis=0) # Shape [M, D] all_messages = activation_fn( all_messages ) # Shape [M, D] (Apply nonlinearity to Edge-MLP outputs as well) aggregated_messages = \ message_aggregation_fn(data=all_messages, segment_ids=message_targets, num_segments=num_nodes) # Shape [V, D] new_node_states = aggregated_messages cur_node_states = new_node_states return cur_node_states
def make_model(self): self.placeholders['target_values'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( ) else: self.ops['final_node_representations'] = tf.zeros_like( self.placeholders['process_raw_graphs']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights[ 'regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values, sigm_val = self.gated_regression( self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) def f(x): x = 1 * x x = x.astype(np.float32) return x new_computed_values = tf.nn.sigmoid(computed_values) new_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=computed_values, labels=self.placeholders['target_values'][ internal_id, :])) a = tf.math.greater_equal(new_computed_values, self.threshold) a = tf.py_func(f, [a], tf.float32) correct_pred = tf.equal( a, self.placeholders['target_values'][internal_id, :]) self.ops['new_computed_values'] = new_computed_values self.ops['sigm_val'] = sigm_val self.ops['accuracy_task%i' % task_id] = tf.reduce_mean( tf.cast(correct_pred, tf.float32)) b = tf.multiply( self.placeholders['target_values'][internal_id, :], 2) b = tf.py_func(f, [b], tf.float32) c = tf.cast(a, tf.float32) d = tf.math.add(b, c) self.ops['sigm_c'] = correct_pred d_TP = tf.math.equal(d, 3) TP = tf.reduce_sum(tf.cast(d_TP, tf.float32)) # true positive d_FN = tf.math.equal(d, 2) FN = tf.reduce_sum(tf.cast(d_FN, tf.float32)) # false negative d_FP = tf.math.equal(d, 1) FP = tf.reduce_sum(tf.cast(d_FP, tf.float32)) # false positive d_TN = tf.math.equal(d, 0) TN = tf.reduce_sum(tf.cast(d_TN, tf.float32)) # true negative self.ops['sigm_sum'] = tf.add_n([TP, FN, FP, TN]) self.ops['sigm_TP'] = TP self.ops['sigm_FN'] = FN self.ops['sigm_FP'] = FP self.ops['sigm_TN'] = TN R = tf.cast(tf.divide(TP, tf.add(TP, FN)), tf.float32) # Recall P = tf.cast(tf.divide(TP, tf.add(TP, FP)), tf.float32) # Precision FPR = tf.cast(tf.divide(FP, tf.add(TN, FP)), tf.float32) # FPR: false positive rate D_TP = tf.add(TP, TP) F1 = tf.cast(tf.divide(D_TP, tf.add_n([D_TP, FP, FN])), tf.float32) # F1 self.ops['sigm_Recall'] = R self.ops['sigm_Precision'] = P self.ops['sigm_F1'] = F1 self.ops['sigm_FPR'] = FPR self.ops['losses'].append(new_loss) self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
def make_model(self): #TODO: refactor if self.args['--pr'] == 'molecule': self.placeholders['target_values'] = tf.compat.v1.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') elif self.args['--pr'] in ['identity']: self.placeholders['target_values'] = tf.compat.v1.placeholder( tf.float32, [None, None, self.num_edge_types, None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [self.num_edge_types, None], name='target_mask') elif self.args['--pr'] in ['btb']: self.placeholders['target_values_head'] = tf.compat.v1.placeholder( tf.float32, [None, None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [self.output_size_edges, None], name='target_mask') self.placeholders[ 'target_values_edges'] = tf.compat.v1.placeholder( tf.float32, [None, None], name='target_values') else: self.placeholders['target_values'] = tf.compat.v1.placeholder( tf.float32, [None, len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.compat.v1.placeholder( tf.int32, [], name='num_graphs') self.placeholders[ 'out_layer_dropout_keep_prob'] = tf.compat.v1.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.compat.v1.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: self.ops[ 'initial_node_representations'] = self.get_initial_node_representation( ) if self.params['use_graph']: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( self.ops['initial_node_representations']) self.ops[ 'second_node_representations'] = self.compute_final_node_representations( self.ops['initial_node_representations'], 1) else: self.ops['final_node_representations'] = tf.zeros_like( self.placeholders['initial_node_representation']) self.ops['losses'] = [] self.ops['losses_edges'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.compat.v1.variable_scope("out_layer_task%i" % task_id): output_size = self.params['output_size'] hidden = [] with tf.compat.v1.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], output_size, hidden, self.placeholders['out_layer_dropout_keep_prob']) self.weights[ 'regression_gate_task_edges%i' % task_id] = MLP( 2 * self.params['hidden_size'], self.output_size_edges, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.compat.v1.variable_scope("regression"): self.weights[ 'regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], output_size, [], self.placeholders['out_layer_dropout_keep_prob']) self.weights[ 'regression_transform_task_edges%i' % task_id] = MLP( self.params['hidden_size'], self.output_size_edges, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression( self.ops['final_node_representations'], self.ops['initial_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id], None) # BTB [b, v * o] ID [e * v * o, b] o is 1 for BTB if self.args['--pr'] in ['btb']: computed_values_edges = self.gated_regression( self.ops['final_node_representations'], self.ops['initial_node_representations'], self.weights['regression_gate_task_edges%i' % task_id], self.weights['regression_transform_task_edges%i' % task_id], None, is_edge_regr=True) # [b, v * e] task_target_mask = self.placeholders['target_mask'][ internal_id, :] # ID [b] else: [b] task_target_num = tf.reduce_sum( input_tensor=task_target_mask) + SMALL_NUMBER # ID and else: b if self.args['--pr'] == 'molecule': labels = self.placeholders['target_values'][internal_id, :] mask = tf.transpose(a=self.placeholders['node_mask']) elif self.args['--pr'] in ['identity']: labels = self.placeholders['target_values'] # [o, v, e, b] labels = tf.transpose(a=labels, perm=[2, 1, 0, 3]) # [e, v, o, b] labels = tf.reshape(labels, [-1, self.placeholders['num_graphs'] ]) # [e * v * o, b] # node_mask ID [b, e * v * o] mask = tf.transpose( a=self.placeholders['node_mask']) # [e * v * o,b] # ID: [e * v * o,b] elif self.args['--pr'] in ['btb']: labels = self.placeholders[ 'target_values_head'] # [b, v * o] mask = self.placeholders['node_mask'] #[b, v * o] labels_edges = self.placeholders[ 'target_values_edges'] # [b, v * e] mask_edges = self.placeholders[ 'node_mask_edges'] # [b, v * e] else: labels = self.placeholders['target_values'][:, internal_id, :] mask = tf.transpose(a=self.placeholders['node_mask']) # diff = computed_values - labels # diff = diff * task_target_mask # Mask out unused values # self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num # task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num # # Normalise loss to account for fewer task-specific examples in batch: # task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) # diff = tf.math.argmax(computed_values, axis = 1) - tf.math.argmax(self.placeholders['target_values'][internal_id, :], axis = 1) # diff = tf.dtypes.cast(diff, tf.float32) #TODO: FIX THIS # computed_values *= task_target_mask # we need to redo accuracy # diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels, # logits=computed_values) # task_loss = diff if self.args['--pr'] == 'molecule': self.calculate_losses_for_molecules( computed_values, internal_id, task_id) else: if self.args['--pr'] == 'btb': task_loss_heads = tf.reduce_sum(-tf.reduce_sum( labels * tf.math.log(computed_values), axis=1) ) / task_target_num task_loss_edges = tf.reduce_sum(-tf.reduce_sum( labels_edges * tf.math.log(computed_values_edges), axis=1)) / task_target_num # task_loss = (task_loss_heads + task_loss_edges) * tf.cast(self.placeholders['num_vertices'], tf.float32) task_loss = (task_loss_heads + task_loss_edges) else: if self.args.get('--no_labels'): computed_values, labels, mask = self.reduce_edge_dimension( computed_values=computed_values, labels=labels, mask=mask) new_mask = tf.cast(mask, tf.bool) masked_loss = tf.boolean_mask( tensor=labels * tf.math.log(computed_values), mask=new_mask) task_loss = tf.reduce_sum( input_tensor=-1 * masked_loss) / task_target_num self.ops['accuracy_task%i' % task_id] = task_loss self.ops['losses'].append(task_loss) self.ops['losses_edges'].append(task_loss_edges) self.ops['computed_values'] = computed_values self.ops['computed_values_edges'] = computed_values_edges self.ops['labels'] = labels self.ops['node_mask'] = tf.transpose( mask) if self.args['--pr'] != 'btb' else mask self.ops['task_target_mask'] = task_target_mask self.ops['loss'] = tf.reduce_sum(input_tensor=self.ops['losses']) self.ops['loss_edges'] = tf.reduce_sum( input_tensor=self.ops['losses_edges'])
def init_STRFNet(sample_batch, num_classes, num_kernels=32, residual_channels=[32, 32], embedding_dimension=1024, num_rnn_layers=2, frame_rate=None, bins_per_octave=None, time_support=None, frequency_support=None, conv2d_sizes=(3, 3), mlp_hiddims=[], activate_out=nn.LogSoftmax(dim=1)): """Initialize a STRFNet for multi-class classification. This is a one-stop solution to create STRFNet and its variants. Parameters ---------- sample_batch: [Batch,Time,Frequency] torch.FloatTensor A batch of training examples that is used for training. Some dimension parameter of the network is inferred cannot be changed. num_classes: int Number of classes for the classification task. Keyword Parameters ------------------ num_kernels: int, 32 2*num_kernels is the number of STRF/2D kernels. Doubling is due to the two orientations of the STRFs. residual_channels: list(int), [32, 32] Specify the number of conv2d channels for each residual block. embedding_dimension: int, 1024 Dimension of the learned embedding (RNN output). frame_rate: float, None Sampling rate [samples/second] / hop size [samples]. No STRF kernels by default. bins_per_octave: int, None Frequency bins per octave in CQT sense. (TODO: extend for non-CQT rep.) No STRF kernels by default. time_support: float, None Number of seconds spanned by each STRF kernel. No STRF kernels by default. frequency_support: int/float, None If frame_rate or bins_per_octave is None, interpret as GaborSTRFConv. - Number of frequency bins (int) spanned by each STRF kernel. Otherwise, interpret as STRFConv. - Number of octaves spanned by each STRF kernel. No STRF kernels by default. conv2d_sizes: (int, int), (3, 3) nn.Conv2d kernel dimensions. mlp_hiddims: list(int), [] Final MLP hidden layer dimensions. Default has no hidden layers. activate_out: callable, nn.LogSoftmax(dim=1) Activation function at the final layer. Default uses LogSoftmax for multi-class classification. """ if all(p is not None for p in (time_support, frequency_support)): is_strfnet = True if all(p is not None for p in (frame_rate, bins_per_octave)): kernel_type = 'wavelet' else: assert all( type(p) is int for p in (time_support, frequency_support)) kernel_type = 'gabor' else: is_strfnet = False is_cnn = conv2d_sizes is not None is_hybrid = is_strfnet and is_cnn if is_hybrid: print(f"Preparing for Hybrid STRFNet; kernel type is {kernel_type}.") elif is_strfnet: print(f"Preparing for STRFNet; kernel type is {kernel_type}.") elif is_cnn: print("Preparing for CNN.") else: raise ValueError("Insufficient parameters. Check example_STRFNet.") if not is_strfnet: strf_layer = None elif kernel_type == 'wavelet': strf_layer = STRFConv(frame_rate, bins_per_octave, time_support, frequency_support, num_kernels) else: strf_layer = GaborSTRFConv(time_support, frequency_support, num_kernels) if is_cnn: d1, d2 = conv2d_sizes if d1 % 2 == 0: d1 += 1 print("Enforcing odd conv2d dimension.") if d2 % 2 == 0: d2 += 1 print("Enforcing odd conv2d dimension.") conv2d_layer = nn.Conv2d( 1, 2 * num_kernels, # Double to match the total number of STRFs (d1, d2), padding=(d1 // 2, d2 // 2)) else: conv2d_layer = None residual_layer = ModResnet((4 if is_hybrid else 2) * num_kernels, residual_channels, False) with torch.no_grad(): flattened_dimension = STRFNet.cnn_forward(sample_batch, strf_layer, conv2d_layer, residual_layer).shape[-1] linear_layer = nn.Linear(flattened_dimension, embedding_dimension) rnn = nn.GRU(embedding_dimension, embedding_dimension, batch_first=True, num_layers=num_rnn_layers, bidirectional=True) mlp = MLP(2 * embedding_dimension, num_classes, hiddims=mlp_hiddims, activate_hid=nn.LeakyReLU(), activate_out=activate_out, batchnorm=[True] * len(mlp_hiddims)) return STRFNet(strf_layer, conv2d_layer, residual_layer, linear_layer, rnn, mlp)
def make_model(self): num_task_id = len(self.params['task_ids']) self.placeholders['target_values'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_values') self.placeholders['target_mask'] = tf.placeholder(tf.float32, [num_task_id, None, 2*num_task_id], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: set_session(self.sess) if self.params['use_graph']: self.ops['final_node_representations'] = self.compute_final_node_representations() #print(self.ops['final_node_representations'].shape) #zero_array = np.zeros(self.ops['final_node_representations'].shape) #sess=tf.Session() #x = self.ops['final_node_representations'].eval(session=sess,feed_dict={self.ops['final_node_representations']:zero_array}) #print(x) #with tf.Session() as sess: # vector = self.ops['final_node_representations'].eval(session=sess) # print(vector) # with open ('./outputs/ggnn_vector.txt', 'a') as f: # f.write(str(tf.Session().run(self.ops['final_node_representations']))) else: self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 2, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 2, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression(self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) #computed_values = tf.Print(computed_values-0.5, [computed_values-0.5, tf.shape(computed_values)], 'computed_values', summarize = 150) tv = self.placeholders['target_values'][internal_id,:] #tf.squeeze( #tv = tf.Print(tv, [tv, tf.shape(tv)], 'tv', summarize = 150) # if computed_values.shape.as_list() == tv.shape.as_list(): # tv = tf.squeeze(tv) #with open('labels_computedValues.txt','a') as f: # f.write('target_values:'+str(self.sess.run(self.tv))+'\ncomputed_values:'+str(self.sess.run(self.computed_values))+'\n') labels = tf.argmax(tv, 1) prediction = tf.argmax(computed_values, 1) accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, labels), tf.float32)) task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=computed_values, labels=tv)) self.tv = tv self.computed_values = computed_values self.labels = labels self.prediction = prediction TP = tf.reduce_sum(prediction*labels) TN = tf.reduce_sum((1-prediction)*(1-labels)) FP = tf.reduce_sum(prediction*(1-labels)) FN = tf.reduce_sum((1-prediction)*labels) precision = TP / (TP + FP) recall = TP / (TP + FN) f1 = 2 * precision * recall / (precision + recall) self.ops['TP%i' % task_id] = TP self.ops['TN%i' % task_id] = TN self.ops['FP%i' % task_id] = FP self.ops['FN%i' % task_id] = FN self.ops['accuracy_task%i' % task_id] = accuracy self.ops['losses'].append(task_loss) self.ops['precision_task%i' % task_id] = precision self.ops['recall_task%i' % task_id] = recall self.ops['f1_task%i' % task_id] = f1 self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
class BottomUpDepLM: def __init__(self, pc, action_vocab, word_vocab_size, rel_vocab_size, layers, hidden_dim, labelled=True, tied=False): self.labelled = labelled self.tied = tied self.action_vocab = action_vocab self.pc = pc.add_subcollection() action_vocab_size = len(action_vocab) if not self.tied: self.word_embs = self.pc.add_lookup_parameters( (word_vocab_size, hidden_dim)) self.action_mlp = MLP(self.pc, [hidden_dim, hidden_dim, action_vocab_size]) self.word_mlp = MLP(self.pc, [hidden_dim, hidden_dim, word_vocab_size]) self.combine_mlp = MLP(self.pc, [2 * hidden_dim, hidden_dim, hidden_dim]) self.stack_lstm = dy.LSTMBuilder(layers, hidden_dim, hidden_dim, self.pc) self.initial_state_params = [ self.pc.add_parameters((hidden_dim, )) for _ in range(2 * layers) ] self.stack_embs = [] if labelled: self.rel_embs = self.pc.add_lookup_parameters( (rel_vocab_size, hidden_dim)) self.rel_mlp = MLP(self.pc, [hidden_dim, hidden_dim, rel_vocab_size]) def new_graph(self): self.action_mlp.new_graph() self.word_mlp.new_graph() self.combine_mlp.new_graph() if self.labelled: self.rel_mlp.new_graph() self.initial_state = [ dy.parameter(p) for p in self.initial_state_params ] def new_sent(self): self.stack_embs = [] self.stack = [] state = self.stack_lstm.initial_state() state = state.set_s(self.initial_state) self.stack_embs.append(state) def set_dropout(self, r): self.action_mlp.set_dropout(r) self.word_mlp.set_dropout(r) self.combine_mlp.set_dropout(r) self.stack_lstm.set_dropout(r) if self.labelled: self.rel_mlp.set_dropout(r) def combine(self, head, child, direction): head_and_child = dy.concatenate([head, child]) return self.combine_mlp(head_and_child) def embed_word(self, word): if self.tied: word_embs = self.word_mlp.layers[-1].w word_emb = dy.select_rows(word_embs, [word]) word_emb = dy.transpose(word_emb) else: word_emb = dy.lookup(self.word_embs, word) return word_emb def embed_stack_naive(self): state = self.stack_lstm.initial_state() state = state.set_s(self.initial_state) for item in self.stack: state = state.add_input(item) return state.output() def embed_stack(self): return self.stack_embs[-1].output() def pop(self): self.stack.pop() self.stack_embs.pop() def push(self, v): self.stack.append(v) state = self.stack_embs[-1] state = state.add_input(v) self.stack_embs.append(state) def shift(self, word): word_emb = self.embed_word(word) self.push(word_emb) def reduce_right(self): assert len(self.stack) >= 2 head = self.stack[-1] child = self.stack[-2] self.pop() self.pop() combined = self.combine(head, child, 'right') self.push(combined) def reduce_left(self): assert len(self.stack) >= 2 head = self.stack[-2] child = self.stack[-1] self.pop() self.pop() combined = self.combine(head, child, 'left') self.push(combined) warned = False def build_graph(self, sent): losses = [] self.new_sent() for action, subtype in sent: action_str = self.action_vocab.to_word(action) # predict action hidden_state = self.embed_stack() action_logits = self.action_mlp(hidden_state) action_nlp = dy.pickneglogsoftmax(action_logits, action) loss = action_nlp if action_str == 'shift': if not self.warned: sys.stderr.write( 'WARNING: Hacked to not include terminal losses') self.warned = True #word_logits = self.word_mlp(hidden_state) #word_nlp = dy.pickneglogsoftmax(word_logits, subtype) #loss += word_nlp elif self.labelled: rel_logits = self.rel_mlp(hidden_state) rel_nlp = dy.pickneglogsoftmax(rel_logits, subtype) #loss += rel_nlp losses.append(loss) # Do the reference action if action_str == 'shift': self.shift(subtype) elif action_str == 'right': self.reduce_right() elif action_str == 'left': self.reduce_left() else: assert 'Unknown action: %s' % action_str return dy.esum(losses)
def sparse_rgin_layer( node_embeddings: tf.Tensor, adjacency_lists: List[tf.Tensor], state_dim: Optional[int], num_timesteps: int = 1, activation_function: Optional[str] = "ReLU", message_aggregation_function: str = "sum", use_target_state_as_input: bool = False, num_edge_MLP_hidden_layers: Optional[int] = 1, num_aggr_MLP_hidden_layers: Optional[int] = None, ) -> tf.Tensor: """ Compute new graph states by neural message passing using MLPs for state updates and message computation. For this, we assume existing node states h^t_v and a list of per-edge-type adjacency matrices A_\ell. We compute new states as follows: h^{t+1}_v := \sigma(MLP_{aggr}(\sum_\ell \sum_{(u, v) \in A_\ell} MLP_\ell(h^t_u))) The learnable parameters of this are the MLPs MLP_\ell. This is derived from Cor. 6 of arXiv:1810.00826, instantiating the functions f, \phi with _separate_ MLPs. This is more powerful than the GIN formulation in Eq. (4.1) of arXiv:1810.00826, as we want to be able to distinguish graphs of the form G_1 = (V={1, 2, 3}, E_1={(1, 2)}, E_2={(3, 2)}) and G_2 = (V={1, 2, 3}, E_1={(3, 2)}, E_2={(1, 2)}) from each other. If we would treat all edges the same, G_1.E_1 \cup G_1.E_2 == G_2.E_1 \cup G_2.E_2 would imply that the two graphs become indistuingishable. Hence, we introduce per-edge-type MLPs, which also means that we have to drop the optimisation of modelling f \circ \phi by a single MLP used in the original GIN formulation. We use the following abbreviations in shape descriptions: * V: number of nodes * D: state dimension * L: number of different edge types * E: number of edges of a given edge type Arguments: node_embeddings: float32 tensor of shape [V, D], the original representation of each node in the graph. adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge of type l connects node v to node u. state_dim: Optional size of output dimension of the GNN layer. If not set, defaults to D, the dimensionality of the input. If different from the input dimension, parameter num_timesteps has to be 1. num_timesteps: Number of repeated applications of this message passing layer. activation_function: Type of activation function used. message_aggregation_function: Type of aggregation function used for messages. use_target_state_as_input: Flag indicating if the edge MLP should consume both source and target state (True) or only source state (False). num_edge_MLP_hidden_layers: Number of hidden layers of the MLPs used to transform messages from neighbouring nodes. If None, the raw states are used directly. num_aggr_MLP_hidden_layers: Number of hidden layers of the MLPs used on the aggregation of messages from neighbouring nodes. If none, the aggregated messages are used directly. Returns: float32 tensor of shape [V, state_dim] """ num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] if state_dim is None: state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] # === Prepare things we need across all timesteps: activation_fn = get_activation(activation_function) message_aggregation_fn = get_aggregation_function( message_aggregation_function) if num_aggr_MLP_hidden_layers is not None: aggregation_MLP = MLP(out_size=state_dim, hidden_layers=num_aggr_MLP_hidden_layers, activation_fun=activation_fn, name="Aggregation_MLP") # type: Optional[MLP] else: aggregation_MLP = None if num_edge_MLP_hidden_layers is not None: edge_type_to_edge_mlp = [ ] # type: Optional[List[MLP]] # MLPs to compute the edge messages else: edge_type_to_edge_mlp = None edge_type_to_message_targets = [] # List of tensors of message targets for edge_type_idx, adjacency_list_for_edge_type in enumerate( adjacency_lists): if edge_type_to_edge_mlp is not None and num_edge_MLP_hidden_layers is not None: edge_type_to_edge_mlp.append( MLP(out_size=state_dim, hidden_layers=num_edge_MLP_hidden_layers, activation_fun=activation_fn, name="Edge_%i_MLP" % edge_type_idx)) edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) # Let M be the number of messages (sum of all E): message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] cur_node_states = node_embeddings for _ in range(num_timesteps): messages_per_type = [] # list of tensors of messages of shape [E, D] # Collect incoming messages per edge type for edge_type_idx, adjacency_list_for_edge_type in enumerate( adjacency_lists): edge_sources = adjacency_list_for_edge_type[:, 0] edge_targets = adjacency_list_for_edge_type[:, 1] edge_source_states = \ tf.nn.embedding_lookup(params=cur_node_states, ids=edge_sources) # Shape [E, D] edge_mlp_inputs = edge_source_states if use_target_state_as_input: edge_target_states = \ tf.nn.embedding_lookup(params=cur_node_states, ids=edge_targets) # Shape [E, D] edge_mlp_inputs = tf.concat( [edge_source_states, edge_target_states], axis=1) # Shape [E, 2*D] if edge_type_to_edge_mlp is not None: messages = edge_type_to_edge_mlp[edge_type_idx]( edge_mlp_inputs) # Shape [E, D] else: messages = edge_mlp_inputs messages_per_type.append(messages) all_messages = tf.concat(messages_per_type, axis=0) # Shape [M, D] if edge_type_to_edge_mlp is not None: all_messages = activation_fn( all_messages ) # Shape [M, D] (Apply nonlinearity to Edge-MLP outputs as well) aggregated_messages = \ message_aggregation_fn(data=all_messages, segment_ids=message_targets, num_segments=num_nodes) # Shape [V, D] new_node_states = aggregated_messages if aggregation_MLP is not None: new_node_states = aggregation_MLP(new_node_states) new_node_states = activation_fn( new_node_states ) # Note that the final MLP layer has no activation, so we do that here explicitly new_node_states = tf.contrib.layers.layer_norm(new_node_states) cur_node_states = new_node_states return cur_node_states
def make_model(self): self.placeholders['target_values'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.placeholder(tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder(tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_mode"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops['final_node_representations'] = self.compute_final_node_representations() else: self.ops['final_node_representations'] = tf.zeros_like(self.placeholders['initial_node_representation']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP(2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights['regression_transform_task%i' % task_id] = MLP(self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression(self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) print(computed_values) # LOOK HERE # computed values -> Tensor mit Werte zwischen 0 und 1 <- hier habe ich ein sigmoid drauf angewendet, # eientlich sind sie nicht zwischen 0 und 1... -> sollte lieber kein sigmoid drüber gelegt werden? Aber # gleichzeitig nähern sich die Werte sowieso bald diesem Bereich an. # target values -> 0 oder 1: dass sind die Klassifikations-labels # Nach meinem Verständnis sollte die „Accuracy“ über die nachfolgende Formel berechnet werden: # Acc = 1 – 1/n*Summe(Label – Prediction)^2 # # Abgebildet auf den vorliegenden Code (in etwa): # accuracy_task = 1 – 1/task_target_num * tf.reduce_sum(tf.square(diff)) print_in = computed_values - self.placeholders['target_values'][internal_id,:] diff = tf.Print(print_in, [print_in], "DIFF: ") # uninteressant: nur None vergleich task_target_mask = self.placeholders['target_mask'][internal_id,:] task_target_num = tf.reduce_sum(task_target_mask) + SMALL_NUMBER diff = diff * task_target_mask # Mask out unused values # if val_acc < best_val_acc -> val_acc ist die kumulation der accuracy values, so ziemlich, # wie sie in 'accuracy_task' landen und dann wird gesagt, dass das modell besser geworden ist. # Da bin ich mir irgendwie nicht sicher, dass das richtig ist... # IF abs is used with binary classification -> DMG # Hier wundert mich das abs, weil wenn ich klassifikationen hab von z.B.: # 0.5, label 1 -> diff = -0.5 # 0.5, label 0 -> diff = 0.5 # und hier sorgt dann das abs dafür, dass die werde eben wieder genau gleich sind. # Und damit wird doch rein gar nichts erreicht dann... # Eigentlich Mean Square ERROR # actual: # self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num # test: self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.square(diff)) / task_target_num # hier geht weider das Vorzeichen verloren, aber bei loss ist das egal, oder? # task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num task_loss = tf.reduce_sum(tf.square(diff)) / task_target_num # Normalise loss to account for fewer task-specific examples in batch: task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) self.ops['losses'].append(task_loss) self.ops['loss'] = tf.reduce_sum(self.ops['losses'])
def make_model(self): self.placeholders['target_values'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.placeholder(tf.int32, [], name='num_graphs') self.placeholders['out_layer_dropout_keep_prob'] = tf.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: if self.params['use_graph']: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( ) else: self.ops['final_node_representations'] = tf.zeros_like( self.placeholders['initial_node_representation']) self.ops['losses'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.variable_scope("out_layer_task%i" % task_id): with tf.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.variable_scope("regression"): self.weights[ 'regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], 1, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression( self.ops['final_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id]) # with tf.Session() as my_sess: # print("此batch得到的结果有" + str(computed_values.shape) + "个,分别是:\n" + my_sess.run(computed_values) + "\n") # print("原始的结果有" + str(self.placeholders['target_values'][internal_id,:].shape) + "个,分别是:\n" + my_sess.run(self.placeholders['target_values'][internal_id,:])) # correct = 0 # for i in range(computed_values.shape): # if (computed_values[i] > 0 and self.placeholders['target_values'][internal_id,:][i] > 0) or (computed_values[i] < 0 and self.placeholders['target_values'][internal_id,:][i] < 0): # correct = correct + 1 # print("此batch正确预测的个数:" + str(correct)) diff = computed_values - self.placeholders['target_values'][ internal_id, :] task_target_mask = self.placeholders['target_mask'][ internal_id, :] task_target_num = tf.reduce_sum( task_target_mask) + SMALL_NUMBER diff = diff * task_target_mask # Mask out unused values self.ops['accuracy_task%i' % task_id] = tf.reduce_sum( tf.abs(diff)) / task_target_num self.ops['predict_task%i' % task_id] = computed_values task_loss = tf.reduce_sum( 0.5 * tf.square(diff)) / task_target_num # Normalise loss to account for fewer task-specific examples in batch: task_loss = task_loss * ( 1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) self.ops['losses'].append(task_loss) self.ops['loss'] = tf.reduce_sum(self.ops['losses'])