Beispiel #1
0
class CandidateNodeAnnotationModel(
        AbstractNeuralModel[Tuple[str, bool], Any,
                            CandidateNodeAnnotationModule],
        AbstractNodeEmbedder,
):
    def __init__(self, embedding_size: int = 128, **kwargs):
        super().__init__()
        self.__str_node_annotation = StrElementRepresentationModel(
            embedding_size=embedding_size - 1, **kwargs)

    def update_metadata_from(self, datapoint: Tuple[str, bool]) -> None:
        self.__str_node_annotation.update_metadata_from(datapoint[0])

    def build_neural_module(self) -> CandidateNodeAnnotationModule:
        return CandidateNodeAnnotationModule(
            node_embeddings_module=self.__str_node_annotation.
            build_neural_module(), )

    def tensorize(self, datapoint: Tuple[str, bool]) -> Tuple[Any, bool]:
        return self.__str_node_annotation.tensorize(datapoint[0]), datapoint[1]

    def initialize_minibatch(self) -> Dict[str, Any]:
        return {
            "node_data": self.__str_node_annotation.initialize_minibatch(),
            "is_candidate": []
        }

    def extend_minibatch_with(self, tensorized_datapoint: Tuple[Any, bool],
                              partial_minibatch: Dict[str, Any]) -> bool:
        continue_extending = self.__str_node_annotation.extend_minibatch_with(
            tensorized_datapoint[0], partial_minibatch["node_data"])
        partial_minibatch["is_candidate"].append(tensorized_datapoint[1])
        return continue_extending

    def finalize_minibatch(self, accumulated_minibatch_data: Dict[str, Any],
                           device: Any) -> Dict[str, Any]:
        return {
            "node_data":
            self.__str_node_annotation.finalize_minibatch(
                accumulated_minibatch_data["node_data"], device=device),
            "is_candidate":
            torch.tensor(accumulated_minibatch_data["is_candidate"],
                         dtype=torch.float32,
                         device=device),
        }

    def representation_size(self) -> int:
        return self.__str_node_annotation.representation_size() + 1
Beispiel #2
0
def create_var_naming_gnn_model(model_cfg):
    hidden_state_size = int(model_cfg.hidden_state_size)
    dropout = float(model_cfg.dropout)

    def create_mlp_mp_layers(num_edges: int):
        def mlp_mp_constructor():
            return MlpMessagePassingLayer(
                input_state_dimension=hidden_state_size,
                message_dimension=hidden_state_size,
                output_state_dimension=hidden_state_size,
                num_edge_types=num_edges,
                message_aggregation_function="max",
                dropout_rate=dropout,
            )

        def mlp_mp_after_res_constructor():
            return MlpMessagePassingLayer(
                input_state_dimension=2 * hidden_state_size,
                message_dimension=2 * hidden_state_size,
                output_state_dimension=hidden_state_size,
                num_edge_types=num_edges,
                message_aggregation_function="max",
                dropout_rate=dropout,
            )

        r1 = ConcatResidualLayer(hidden_state_size)
        r2 = ConcatResidualLayer(hidden_state_size)
        return [
            r1.pass_through_dummy_layer(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            r1,
            mlp_mp_after_res_constructor(),
            r2.pass_through_dummy_layer(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            r2,
            mlp_mp_after_res_constructor(),
        ]

    def create_ggnn_mp_layers(num_edges: int):
        ggnn_mp = GatedMessagePassingLayer(
            state_dimension=hidden_state_size,
            message_dimension=hidden_state_size,
            num_edge_types=num_edges,
            message_aggregation_function="sum",
            dropout_rate=0.01,
        )
        r1 = MeanResidualLayer(hidden_state_size)
        r2 = MeanResidualLayer(hidden_state_size)

        def global_update():
            return GruGlobalStateUpdate(
                global_graph_representation_module=
                WeightedSumVarSizedElementReduce(hidden_state_size),
                input_state_size=hidden_state_size,
                summarized_state_size=hidden_state_size,
                dropout_rate=dropout,
            )

        return [
            r1.pass_through_dummy_layer(),
            r2.pass_through_dummy_layer(),
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            global_update(),
            ggnn_mp,
            r1,
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            global_update(),
            ggnn_mp,
            r2,
        ]

    if model_cfg.mp_type == "mlp":
        create_mp_layers = create_mlp_mp_layers
    elif model_cfg.mp_type == "ggnn":
        create_mp_layers = create_ggnn_mp_layers
    else:
        ValueError('mp_type must be in ["mlp", "ggnn"]')
        return
    return VarNamingModel(
        gnn_model=GraphNeuralNetworkModel(
            node_representation_model=StrElementRepresentationModel(
                embedding_size=hidden_state_size,
                token_splitting="subtoken",
                vocabulary_size=int(model_cfg.gnn_vocabulary_size)),
            message_passing_layer_creator=create_mp_layers,
            max_nodes_per_graph=int(model_cfg.max_nodes_per_graph),
            max_graph_edges=int(model_cfg.max_graph_edges),
            introduce_backwards_edges=False,
            add_self_edges=False,
            stop_extending_minibatch_after_num_nodes=int(
                model_cfg.stop_extending_minibatch_after_num_nodes)),
        decoder_model=RNNDecoderModel(
            target_representation_model=StrRepresentationModel(
                embedding_size=hidden_state_size,
                token_splitting="subtoken",
                vocabulary_size=int(model_cfg.decoder_vocabulary_size)),
            create_rnn=lambda: nn.GRU(input_size=hidden_state_size,
                                      hidden_size=hidden_state_size,
                                      dropout=dropout)))
Beispiel #3
0
 def __init__(self, embedding_size: int = 128, **kwargs):
     super().__init__()
     self.__str_node_annotation = StrElementRepresentationModel(
         embedding_size=embedding_size - 1, **kwargs)
Beispiel #4
0
def create_graph2class_gnn_model(hidden_state_size: int = 64, dropout_rate: float = 0.1):
    def create_ggnn_mp_layers(num_edges: int):
        ggnn_mp = GatedMessagePassingLayer(
            state_dimension=hidden_state_size,
            message_dimension=hidden_state_size,
            num_edge_types=num_edges,
            message_aggregation_function="max",
            dropout_rate=dropout_rate,
        )
        r1 = ConcatResidualLayer(hidden_state_size)
        return [
            r1.pass_through_dummy_layer(),
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            ggnn_mp,
            r1,
            GatedMessagePassingLayer(
                state_dimension=2 * hidden_state_size,
                message_dimension=hidden_state_size,
                num_edge_types=num_edges,
                message_aggregation_function="max",
                dropout_rate=dropout_rate,
            ),
        ]

    def create_mlp_mp_layers(num_edges: int):
        mlp_mp_constructor = lambda: MlpMessagePassingLayer(
            input_state_dimension=hidden_state_size,
            message_dimension=hidden_state_size,
            output_state_dimension=hidden_state_size,
            num_edge_types=num_edges,
            message_aggregation_function="max",
            dropout_rate=0.1,
        )
        mlp_mp_after_res_constructor = lambda: MlpMessagePassingLayer(
            input_state_dimension=2 * hidden_state_size,
            message_dimension=2 * hidden_state_size,
            output_state_dimension=hidden_state_size,
            num_edge_types=num_edges,
            message_aggregation_function="max",
            dropout_rate=0.1,
        )
        r1 = ConcatResidualLayer(hidden_state_size)
        r2 = ConcatResidualLayer(hidden_state_size)
        return [
            r1.pass_through_dummy_layer(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            r1,
            mlp_mp_after_res_constructor(),
            r2.pass_through_dummy_layer(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            mlp_mp_constructor(),
            r2,
            mlp_mp_after_res_constructor(),
        ]

    return Graph2Class(
        gnn_model=GraphNeuralNetworkModel(
            node_representation_model=StrElementRepresentationModel(
                embedding_size=hidden_state_size,
                token_splitting="subtoken",
                subtoken_combination="mean",
                vocabulary_size=10000,
                min_freq_threshold=5,
                dropout_rate=dropout_rate,
            ),
            message_passing_layer_creator=create_mlp_mp_layers,
            max_nodes_per_graph=100000,
            max_graph_edges=500000,
            introduce_backwards_edges=True,
            add_self_edges=True,
            stop_extending_minibatch_after_num_nodes=120000,
            edge_dropout_rate=0.0,
        ),
        max_num_classes=100,
    )
Beispiel #5
0
def run(arguments):
    if arguments["--aml"]:
        import torch
        from azureml.core.run import Run

        aml_ctx = Run.get_context()
        assert torch.cuda.is_available(
        ), "No CUDA available. Aborting training."
    else:
        aml_ctx = None

    log_path = configure_logging(aml_ctx)
    azure_info_path = arguments.get("--azure-info", None)

    training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"],
                                         azure_info_path)
    training_data = LazyDataIterable(
        lambda: training_data_path.read_as_jsonl())

    validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"],
                                           azure_info_path)
    validation_data = LazyDataIterable(
        lambda: validation_data_path.read_as_jsonl())

    model_path = Path(arguments["MODEL_FILENAME"])
    assert model_path.name.endswith(
        ".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix."

    initialize_metadata = True
    restore_path = arguments.get("--restore-path", None)
    if restore_path:
        initialize_metadata = False
        model, nn = AbstractNeuralModel.restore_model(Path(restore_path))
    else:
        embedding_size = 128
        dropout_rate = 0.1
        nn = None

        def create_mp_layers(num_edges: int):
            ggnn_mp = GatedMessagePassingLayer(
                state_dimension=embedding_size,
                message_dimension=embedding_size,
                num_edge_types=num_edges,
                message_aggregation_function="sum",
                dropout_rate=dropout_rate,
            )
            r1 = MeanResidualLayer(embedding_size)
            return [
                r1.pass_through_dummy_layer(),
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                r1,
                GatedMessagePassingLayer(
                    state_dimension=embedding_size,
                    message_dimension=embedding_size,
                    num_edge_types=num_edges,
                    message_aggregation_function="sum",
                    dropout_rate=dropout_rate,
                ),
            ]

        model = Graph2Seq(
            gnn_model=GraphNeuralNetworkModel(
                node_representation_model=StrElementRepresentationModel(
                    token_splitting="token",
                    embedding_size=embedding_size,
                ),
                message_passing_layer_creator=create_mp_layers,
            ),
            decoder=GruCopyingDecoderModel(hidden_size=128,
                                           embedding_size=256,
                                           memories_hidden_dim=embedding_size),
        )

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(arguments["--max-num-epochs"]),
        minibatch_size=int(arguments["--minibatch-size"]),
        enable_amp=arguments["--amp"],
    )
    if nn is not None:
        trainer.neural_module = nn

    trainer.register_train_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model,
                                                  epoch, metrics))
    trainer.register_validation_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model,
                                                  epoch, metrics))

    trainer.train(
        training_data,
        validation_data,
        show_progress_bar=not arguments["--quiet"],
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
    )

    if aml_ctx is not None:
        aml_ctx.upload_file(name="model.pkl.gz",
                            path_or_stream=str(model_path))
        aml_ctx.upload_file(name="full.log", path_or_stream=log_path)