def __init__(self, args): super(MultiPrototypeTransductiveInference, self).__init__() # self.gpu_id = args.gpu_id self.n_way = args.n_way self.k_shot = args.k_shot self.in_channels = args.pc_in_dim self.n_points = args.pc_npts self.use_attention = args.use_attention self.n_subprototypes = args.n_subprototypes self.k_connect = args.k_connect self.sigma = args.sigma self.n_classes = self.n_way + 1 self.encoder = DGCNN(args.edgeconv_widths, args.dgcnn_mlp_widths, args.pc_in_dim, k=args.dgcnn_k) self.base_learner = BaseLearner(args.dgcnn_mlp_widths[-1], args.base_widths) if self.use_attention: self.att_learner = SelfAttention(args.dgcnn_mlp_widths[-1], args.output_dim) else: self.linear_mapper = nn.Conv1d(args.dgcnn_mlp_widths[-1], args.output_dim, 1, bias=False) self.feat_dim = args.edgeconv_widths[0][ -1] + args.output_dim + args.base_widths[-1]
def __init__(self, image_shape, output_size): super().__init__() c, h, w = image_shape self.conv1 = torch.nn.Conv2d(in_channels=c, out_channels=32, kernel_size=8, stride=4, padding=0) self.conv2 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=3, padding=0) self.conv3 = torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1) self.attention = SelfAttention(32) convs = [self.conv1, self.attention, self.conv2, self.conv3] conv_output_size = self.conv_out_size(convs, h, w) self.fc = torch.nn.Linear(conv_output_size, 512) self.pi = torch.nn.Linear(512, output_size) #value function head self.value = torch.nn.Linear(512, 1) #reset weights just like nature paper self.init_weights()
def __init__(self, history_size, num_layers, units_per_layer, lr, obs_n_shape, act_shape, act_type, gumbel_temperature, q_network, agent_index, noise, use_ounoise, temporal_mode): """ Implementation of the policy network, with optional gumbel softmax activation at the final layer. """ self.num_layers = num_layers self.lr = lr self.history_size = history_size self.obs_n_shape = obs_n_shape self.act_shape = act_shape self.act_type = act_type if act_type is Discrete: self.use_gumbel = True else: self.use_gumbel = False self.use_ounoise = use_ounoise self.gumbel_temperature = gumbel_temperature self.q_network = q_network self.agent_index = agent_index self.clip_norm = 0.5 self.noise = noise self.noise_mode = OUNoise(act_shape[0], scale=1.0) self.temporal_mode = temporal_mode self.optimizer = tf.keras.optimizers.Adam(lr=self.lr) ### set up network structure self.obs_input = tf.keras.layers.Input(shape=(self.history_size, self.obs_n_shape[agent_index][0])) self.temporal_state = None if self.temporal_mode.lower() == "rnn": self.temporal_state = tf.keras.layers.GRU(units_per_layer) elif self.temporal_mode.lower() == "attention": self.temporal_state = SelfAttention(activation=tf.keras.layers.LeakyReLU(alpha=0.1)) else: raise RuntimeError( "Temporal Information Layer should be rnn or attention but %s found!" % self.temporal_mode) self.hidden_layers = [] for idx in range(num_layers): layer = tf.keras.layers.Dense(units_per_layer, activation='relu', name='ag{}pol_hid{}'.format(agent_index, idx)) self.hidden_layers.append(layer) if self.use_gumbel: self.output_layer = tf.keras.layers.Dense(self.act_shape, activation='linear', name='ag{}pol_out{}'.format(agent_index, idx)) else: self.output_layer = tf.keras.layers.Dense(self.act_shape, activation='tanh', name='ag{}pol_out{}'.format(agent_index, idx)) # connect layers x = self.obs_input x = self.temporal_state(x) if self.temporal_mode.lower() == "attention": x = tf.keras.layers.Lambda(lambda x: x[:, -1])(x) for layer in self.hidden_layers: x = layer(x) x = self.output_layer(x) self.model = tf.keras.Model(inputs=[self.obs_input], outputs=[x])
def __init__(self, config): super(LSRPIPELINE, self).__init__() self.config = config self.finetune_emb = config.finetune_emb self.word_emb = nn.Embedding(config.data_word_vec.shape[0], config.data_word_vec.shape[1]) self.word_emb.weight.data.copy_(torch.from_numpy(config.data_word_vec)) if not self.finetune_emb: self.word_emb.weight.requires_grad = False self.ner_emb = nn.Embedding(13, config.entity_type_size, padding_idx=0) self.coref_embed = nn.Embedding(config.max_length, config.coref_size, padding_idx=0) hidden_size = config.rnn_hidden input_size = config.data_word_vec.shape[1] + config.coref_size + config.entity_type_size #+ char_hidden self.linear_re = nn.Linear(hidden_size * 2, hidden_size) self.linear_sent = nn.Linear(hidden_size * 2, hidden_size) self.bili = torch.nn.Bilinear(hidden_size, hidden_size, hidden_size) self.self_att = SelfAttention(hidden_size) self.bili = torch.nn.Bilinear(hidden_size+config.dis_size, hidden_size+config.dis_size, hidden_size) self.dis_embed = nn.Embedding(20, config.dis_size, padding_idx=10) self.linear_output = nn.Linear(2 * hidden_size, config.relation_num) self.relu = nn.ReLU() self.dropout_rate = nn.Dropout(config.dropout_rate) self.rnn_sent = Encoder(input_size, hidden_size, config.dropout_emb, config.dropout_rate) self.hidden_size = hidden_size self.use_struct_att = config.use_struct_att if self.use_struct_att == True: self.structInduction = StructInduction(hidden_size // 2, hidden_size, True) self.dropout_gcn = nn.Dropout(config.dropout_gcn) self.reasoner_layer_first = config.reasoner_layer_first self.reasoner_layer_second = config.reasoner_layer_second self.use_reasoning_block = config.use_reasoning_block if self.use_reasoning_block: self.reasoner = nn.ModuleList() self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_first, self.dropout_gcn)) self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_second, self.dropout_gcn))
def __init__(self, config): super(LSR, self).__init__() self.config = config self.bert = BertModel.from_pretrained('bert-base-uncased') print("loaded bert-base-uncased") hidden_size = config.rnn_hidden bert_hidden_size = 768 self.linear_re = nn.Linear(bert_hidden_size, hidden_size) self.linear_sent = nn.Linear(hidden_size * 2, hidden_size) self.bili = torch.nn.Bilinear(hidden_size, hidden_size, hidden_size) self.self_att = SelfAttention(hidden_size) self.bili = torch.nn.Bilinear(hidden_size+config.dis_size, hidden_size+config.dis_size, hidden_size) self.dis_embed = nn.Embedding(20, config.dis_size, padding_idx=10) self.linear_output = nn.Linear(2 * hidden_size, config.relation_num) self.relu = nn.ReLU() self.dropout_rate = nn.Dropout(config.dropout_rate) #self.rnn_sent = Encoder(input_size, hidden_size, config.dropout_emb, config.dropout_rate) self.hidden_size = hidden_size self.use_struct_att = config.use_struct_att if self.use_struct_att == True: self.structInduction = StructInduction(hidden_size // 2, hidden_size, True) self.dropout_gcn = nn.Dropout(config.dropout_gcn) self.reasoner_layer_first = config.reasoner_layer_first self.reasoner_layer_second = config.reasoner_layer_second self.use_reasoning_block = config.use_reasoning_block if self.use_reasoning_block: self.reasoner = nn.ModuleList() self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_first, self.dropout_gcn)) self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_second, self.dropout_gcn))
def __init__(self, vocab_size, hidden_size, embedding_size, bidirectional=True, embedding=None): super(Encoder, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.embedding_size = embedding_size self.bidirectional = bidirectional self.gru = nn.LSTM(self.embedding_size, self.hidden_size, bidirectional=self.bidirectional, batch_first=True) self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) if embedding is not None: self.embedding.weight = nn.Parameter(embedding) self.attention = SelfAttention(hidden_size=self.hidden_size)
def graph_net(arglist): I = [] for _ in range(no_agents): I.append(Input(shape=( arglist.history_size, no_features, ))) outputs = [] temporal_state = None for i in range(no_agents): if arglist.temporal_mode.lower() == "rnn": temporal_state = GRU(arglist.no_neurons)(I[i]) elif arglist.temporal_mode.lower() == "attention": temporal_state = SelfAttention( activation=tf.keras.layers.LeakyReLU(alpha=0.1))(I[i]) temporal_state = Lambda(lambda x: x[:, -1])(temporal_state) else: raise RuntimeError( "Temporal Information Layer should be rnn or attention but %s found!" % arglist.temporal_mode) dense = Dense( arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1))(temporal_state) med_dense = Dense( arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1))(dense) last_dense = Dense( no_actions, kernel_initializer=tf.keras.initializers.he_uniform())(med_dense) outputs.append(last_dense) V = tf.stack(outputs, axis=1) model = Model(I, V) model._name = "final_network" tf.keras.utils.plot_model(model, show_shapes=True) return model
def __init__(self, args): super(ProtoNet, self).__init__() self.n_way = args.n_way self.k_shot = args.k_shot self.dist_method = args.dist_method self.in_channels = args.pc_in_dim self.n_points = args.pc_npts self.use_attention = args.use_attention self.encoder = DGCNN(args.edgeconv_widths, args.dgcnn_mlp_widths, args.pc_in_dim, k=args.dgcnn_k) self.base_learner = BaseLearner(args.dgcnn_mlp_widths[-1], args.base_widths) if self.use_attention: self.att_learner = SelfAttention(args.dgcnn_mlp_widths[-1], args.output_dim) else: self.linear_mapper = nn.Conv1d(args.dgcnn_mlp_widths[-1], args.output_dim, 1, bias=False)