Ejemplo n.º 1
0
    def to_tensor_dict(
            self,
            examples: List[Example],
            return_prediction_target=True) -> Dict[str, torch.Tensor]:
        from model.sequential_encoder import SequentialEncoder
        from model.graph_encoder import GraphASTEncoder

        if not hasattr(examples[0], 'target_prediction_seq_length'):
            for example in examples:
                self.annotate_example(example)

        if self.config['encoder']['type'] == 'GraphASTEncoder':
            init_with_seq_encoding = self.config['encoder'][
                'init_with_seq_encoding']
            packed_graph, tensor_dict = GraphASTEncoder.to_packed_graph(
                [e.ast for e in examples],
                connections=self.config['encoder']['connections'],
                init_with_seq_encoding=init_with_seq_encoding)

            if init_with_seq_encoding:
                seq_tensor_dict = SequentialEncoder.to_tensor_dict(examples)
                tensor_dict['seq_encoder_input'] = seq_tensor_dict

            _tensors = GraphASTEncoder.to_tensor_dict(packed_graph,
                                                      self.grammar, self.vocab)
            tensor_dict.update(_tensors)
        elif self.config['encoder']['type'] == 'SequentialEncoder':
            tensor_dict = SequentialEncoder.to_tensor_dict(examples)
        elif self.config['encoder']['type'] == 'HybridEncoder':
            packed_graph, gnn_tensor_dict = GraphASTEncoder.to_packed_graph(
                [e.ast for e in examples],
                connections=self.config['encoder']['graph_encoder']
                ['connections'])
            gnn_tensors = GraphASTEncoder.to_tensor_dict(
                packed_graph, self.grammar, self.vocab)
            gnn_tensor_dict.update(gnn_tensors)

            seq_tensor_dict = SequentialEncoder.to_tensor_dict(examples)

            tensor_dict = {
                'graph_encoder_input': gnn_tensor_dict,
                'seq_encoder_input': seq_tensor_dict
            }
        else:
            raise ValueError('UnknownEncoderType')

        if self.train or return_prediction_target:
            prediction_target = self.to_batched_prediction_target(examples)
            tensor_dict['prediction_target'] = prediction_target

        if not self.train:
            if hasattr(examples[0], 'test_meta'):
                tensor_dict['test_meta'] = [e.test_meta for e in examples]

        tensor_dict['batch_size'] = len(examples)
        num_elements = nn_util.get_tensor_dict_size(tensor_dict)
        tensor_dict['num_elements'] = num_elements

        return tensor_dict
Ejemplo n.º 2
0
    def __init__(self,
                 gnn: GatedGraphNeuralNetwork,
                 connections: List[str],
                 node_syntax_type_embedding_size: int,
                 decoder_hidden_size: int,
                 node_type_embedder: NodeTypeEmbedder,
                 node_content_embedder: SubTokenEmbedder,
                 vocab: Vocab,
                 config):
        super(GraphASTEncoder, self).__init__()

        self.connections = connections
        self.gnn = gnn

        self.vocab = vocab
        self.grammar = grammar = vocab.grammar

        self.node_syntax_type_embedding = nn.Embedding(
            len(grammar.syntax_types) + 2,
            node_syntax_type_embedding_size
        )
        self.variable_master_node_type_idx = len(grammar.syntax_types)
        self.master_node_type_idx = self.variable_master_node_type_idx + 1

        self.var_node_name_embedding = \
            nn.Embedding(len(vocab.source), gnn.hidden_size, padding_idx=0)

        self.node_type_embedder = node_type_embedder
        self.node_content_embedder = node_content_embedder

        self.type_and_content_hybrid = nn.Linear(
            node_syntax_type_embedding_size
            + node_type_embedder.embeddings.embedding_dim
            + node_content_embedder.embeddings.embedding_dim,
            gnn.hidden_size,
            bias=False
        )

        self.decoder_cell_init = \
            nn.Linear(gnn.hidden_size, decoder_hidden_size)

        self.init_with_seq_encoding = config['init_with_seq_encoding']
        if self.init_with_seq_encoding:
            self.seq_encoder = SequentialEncoder.build(config['seq_encoder'])
            if config['seq_encoder']['source_encoding_size'] \
               != gnn.hidden_size:
                self.seq_variable_encoding_to_graph_linear = \
                    nn.Linear(
                        config['seq_encoder']['source_encoding_size'],
                        gnn.hidden_size
                    )
            else:
                self.seq_variable_encoding_to_graph_linear = lambda x: x

        self.config: Dict = config
Ejemplo n.º 3
0
    def __init__(self, config):
        super(HybridEncoder, self).__init__()

        self.graph_encoder = GraphASTEncoder.build(config['graph_encoder'])
        self.seq_encoder = SequentialEncoder.build(config['seq_encoder'])

        self.hybrid_method = config['hybrid_method']
        if self.hybrid_method == 'linear_proj':
            self.projection = nn.Linear(config['seq_encoder']['decoder_hidden_size'] + config['graph_encoder']['gnn']['hidden_size'],
                                        config['source_encoding_size'], bias=False)
        else:
            assert self.hybrid_method == 'concat'

        self.config = config
Ejemplo n.º 4
0
 def default_params(cls):
     return {
         "graph_encoder": GraphASTEncoder.default_params(),
         "seq_encoder": SequentialEncoder.default_params(),
         "hybrid_method": "linear_proj"
     }