예제 #1
0
파일: mpti.py 프로젝트: marcelomata/attMPTI
    def __init__(self, args):
        super(MultiPrototypeTransductiveInference, self).__init__()
        # self.gpu_id = args.gpu_id
        self.n_way = args.n_way
        self.k_shot = args.k_shot
        self.in_channels = args.pc_in_dim
        self.n_points = args.pc_npts
        self.use_attention = args.use_attention
        self.n_subprototypes = args.n_subprototypes
        self.k_connect = args.k_connect
        self.sigma = args.sigma

        self.n_classes = self.n_way + 1

        self.encoder = DGCNN(args.edgeconv_widths,
                             args.dgcnn_mlp_widths,
                             args.pc_in_dim,
                             k=args.dgcnn_k)
        self.base_learner = BaseLearner(args.dgcnn_mlp_widths[-1],
                                        args.base_widths)

        if self.use_attention:
            self.att_learner = SelfAttention(args.dgcnn_mlp_widths[-1],
                                             args.output_dim)
        else:
            self.linear_mapper = nn.Conv1d(args.dgcnn_mlp_widths[-1],
                                           args.output_dim,
                                           1,
                                           bias=False)

        self.feat_dim = args.edgeconv_widths[0][
            -1] + args.output_dim + args.base_widths[-1]
예제 #2
0
    def __init__(self, image_shape, output_size):
        super().__init__()
        c, h, w = image_shape
        self.conv1 = torch.nn.Conv2d(in_channels=c,
                                     out_channels=32,
                                     kernel_size=8,
                                     stride=4,
                                     padding=0)
        self.conv2 = torch.nn.Conv2d(in_channels=32,
                                     out_channels=64,
                                     kernel_size=4,
                                     stride=3,
                                     padding=0)
        self.conv3 = torch.nn.Conv2d(in_channels=64,
                                     out_channels=64,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1)

        self.attention = SelfAttention(32)

        convs = [self.conv1, self.attention, self.conv2, self.conv3]

        conv_output_size = self.conv_out_size(convs, h, w)

        self.fc = torch.nn.Linear(conv_output_size, 512)

        self.pi = torch.nn.Linear(512, output_size)
        #value function head
        self.value = torch.nn.Linear(512, 1)
        #reset weights just like nature paper
        self.init_weights()
예제 #3
0
    def __init__(self, history_size, num_layers, units_per_layer, lr, obs_n_shape, act_shape, act_type,
                 gumbel_temperature, q_network, agent_index, noise, use_ounoise, temporal_mode):
        """
        Implementation of the policy network, with optional gumbel softmax activation at the final layer.
        """
        self.num_layers = num_layers
        self.lr = lr
        self.history_size = history_size
        self.obs_n_shape = obs_n_shape
        self.act_shape = act_shape
        self.act_type = act_type
        if act_type is Discrete:
            self.use_gumbel = True
        else:
            self.use_gumbel = False
        self.use_ounoise = use_ounoise
        self.gumbel_temperature = gumbel_temperature
        self.q_network = q_network
        self.agent_index = agent_index
        self.clip_norm = 0.5
        self.noise = noise
        self.noise_mode = OUNoise(act_shape[0], scale=1.0)
        self.temporal_mode = temporal_mode
        self.optimizer = tf.keras.optimizers.Adam(lr=self.lr)

        ### set up network structure
        self.obs_input = tf.keras.layers.Input(shape=(self.history_size, self.obs_n_shape[agent_index][0]))
        self.temporal_state = None
        if self.temporal_mode.lower() == "rnn":
            self.temporal_state = tf.keras.layers.GRU(units_per_layer)
        elif self.temporal_mode.lower() == "attention":
            self.temporal_state = SelfAttention(activation=tf.keras.layers.LeakyReLU(alpha=0.1))
        else:
            raise RuntimeError(
                "Temporal Information Layer should be rnn or attention but %s found!" % self.temporal_mode)

        self.hidden_layers = []
        for idx in range(num_layers):
            layer = tf.keras.layers.Dense(units_per_layer, activation='relu',
                                          name='ag{}pol_hid{}'.format(agent_index, idx))
            self.hidden_layers.append(layer)

        if self.use_gumbel:
            self.output_layer = tf.keras.layers.Dense(self.act_shape, activation='linear',
                                                      name='ag{}pol_out{}'.format(agent_index, idx))
        else:
            self.output_layer = tf.keras.layers.Dense(self.act_shape, activation='tanh',
                                                      name='ag{}pol_out{}'.format(agent_index, idx))

        # connect layers
        x = self.obs_input
        x = self.temporal_state(x)
        if self.temporal_mode.lower() == "attention":
            x = tf.keras.layers.Lambda(lambda x: x[:, -1])(x)
        for layer in self.hidden_layers:
            x = layer(x)
        x = self.output_layer(x)

        self.model = tf.keras.Model(inputs=[self.obs_input], outputs=[x])
예제 #4
0
    def __init__(self, config):
        super(LSRPIPELINE, self).__init__()
        self.config = config

        self.finetune_emb = config.finetune_emb

        self.word_emb = nn.Embedding(config.data_word_vec.shape[0], config.data_word_vec.shape[1])
        self.word_emb.weight.data.copy_(torch.from_numpy(config.data_word_vec))
        if not self.finetune_emb:
            self.word_emb.weight.requires_grad = False

        self.ner_emb = nn.Embedding(13, config.entity_type_size, padding_idx=0)

        self.coref_embed = nn.Embedding(config.max_length, config.coref_size, padding_idx=0)

        hidden_size = config.rnn_hidden
        input_size = config.data_word_vec.shape[1] + config.coref_size + config.entity_type_size #+ char_hidden

        self.linear_re = nn.Linear(hidden_size * 2,  hidden_size)

        self.linear_sent = nn.Linear(hidden_size * 2,  hidden_size)

        self.bili = torch.nn.Bilinear(hidden_size, hidden_size, hidden_size)

        self.self_att = SelfAttention(hidden_size)

        self.bili = torch.nn.Bilinear(hidden_size+config.dis_size,  hidden_size+config.dis_size, hidden_size)
        self.dis_embed = nn.Embedding(20, config.dis_size, padding_idx=10)

        self.linear_output = nn.Linear(2 * hidden_size, config.relation_num)

        self.relu = nn.ReLU()

        self.dropout_rate = nn.Dropout(config.dropout_rate)

        self.rnn_sent = Encoder(input_size, hidden_size, config.dropout_emb, config.dropout_rate)
        self.hidden_size = hidden_size

        self.use_struct_att = config.use_struct_att
        if  self.use_struct_att == True:
            self.structInduction = StructInduction(hidden_size // 2, hidden_size, True)

        self.dropout_gcn = nn.Dropout(config.dropout_gcn)
        self.reasoner_layer_first = config.reasoner_layer_first
        self.reasoner_layer_second = config.reasoner_layer_second
        self.use_reasoning_block = config.use_reasoning_block
        if self.use_reasoning_block:
            self.reasoner = nn.ModuleList()
            self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_first, self.dropout_gcn))
            self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_second, self.dropout_gcn))
예제 #5
0
    def __init__(self, config):
        super(LSR, self).__init__()
        self.config = config


        self.bert = BertModel.from_pretrained('bert-base-uncased')
        print("loaded bert-base-uncased")

        hidden_size = config.rnn_hidden
        bert_hidden_size = 768
        self.linear_re = nn.Linear(bert_hidden_size, hidden_size)

        self.linear_sent = nn.Linear(hidden_size * 2,  hidden_size)

        self.bili = torch.nn.Bilinear(hidden_size, hidden_size, hidden_size)

        self.self_att = SelfAttention(hidden_size)

        self.bili = torch.nn.Bilinear(hidden_size+config.dis_size,  hidden_size+config.dis_size, hidden_size)
        self.dis_embed = nn.Embedding(20, config.dis_size, padding_idx=10)

        self.linear_output = nn.Linear(2 * hidden_size, config.relation_num)

        self.relu = nn.ReLU()

        self.dropout_rate = nn.Dropout(config.dropout_rate)

        #self.rnn_sent = Encoder(input_size, hidden_size, config.dropout_emb, config.dropout_rate)
        self.hidden_size = hidden_size

        self.use_struct_att = config.use_struct_att
        if  self.use_struct_att == True:
            self.structInduction = StructInduction(hidden_size // 2, hidden_size, True)

        self.dropout_gcn = nn.Dropout(config.dropout_gcn)
        self.reasoner_layer_first = config.reasoner_layer_first
        self.reasoner_layer_second = config.reasoner_layer_second
        self.use_reasoning_block = config.use_reasoning_block
        if self.use_reasoning_block:
            self.reasoner = nn.ModuleList()
            self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_first, self.dropout_gcn))
            self.reasoner.append(DynamicReasoner(hidden_size, self.reasoner_layer_second, self.dropout_gcn))
예제 #6
0
    def __init__(self,
                 vocab_size,
                 hidden_size,
                 embedding_size,
                 bidirectional=True,
                 embedding=None):
        super(Encoder, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.bidirectional = bidirectional
        self.gru = nn.LSTM(self.embedding_size,
                           self.hidden_size,
                           bidirectional=self.bidirectional,
                           batch_first=True)
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
        if embedding is not None:
            self.embedding.weight = nn.Parameter(embedding)

        self.attention = SelfAttention(hidden_size=self.hidden_size)
예제 #7
0
def graph_net(arglist):
    I = []
    for _ in range(no_agents):
        I.append(Input(shape=(
            arglist.history_size,
            no_features,
        )))

    outputs = []
    temporal_state = None
    for i in range(no_agents):
        if arglist.temporal_mode.lower() == "rnn":
            temporal_state = GRU(arglist.no_neurons)(I[i])
        elif arglist.temporal_mode.lower() == "attention":
            temporal_state = SelfAttention(
                activation=tf.keras.layers.LeakyReLU(alpha=0.1))(I[i])
            temporal_state = Lambda(lambda x: x[:, -1])(temporal_state)
        else:
            raise RuntimeError(
                "Temporal Information Layer should be rnn or attention but %s found!"
                % arglist.temporal_mode)
        dense = Dense(
            arglist.no_neurons,
            kernel_initializer=tf.keras.initializers.he_uniform(),
            activation=tf.keras.layers.LeakyReLU(alpha=0.1))(temporal_state)
        med_dense = Dense(
            arglist.no_neurons,
            kernel_initializer=tf.keras.initializers.he_uniform(),
            activation=tf.keras.layers.LeakyReLU(alpha=0.1))(dense)
        last_dense = Dense(
            no_actions,
            kernel_initializer=tf.keras.initializers.he_uniform())(med_dense)
        outputs.append(last_dense)

    V = tf.stack(outputs, axis=1)
    model = Model(I, V)
    model._name = "final_network"
    tf.keras.utils.plot_model(model, show_shapes=True)
    return model
예제 #8
0
    def __init__(self, args):
        super(ProtoNet, self).__init__()
        self.n_way = args.n_way
        self.k_shot = args.k_shot
        self.dist_method = args.dist_method
        self.in_channels = args.pc_in_dim
        self.n_points = args.pc_npts
        self.use_attention = args.use_attention

        self.encoder = DGCNN(args.edgeconv_widths,
                             args.dgcnn_mlp_widths,
                             args.pc_in_dim,
                             k=args.dgcnn_k)
        self.base_learner = BaseLearner(args.dgcnn_mlp_widths[-1],
                                        args.base_widths)

        if self.use_attention:
            self.att_learner = SelfAttention(args.dgcnn_mlp_widths[-1],
                                             args.output_dim)
        else:
            self.linear_mapper = nn.Conv1d(args.dgcnn_mlp_widths[-1],
                                           args.output_dim,
                                           1,
                                           bias=False)