コード例 #1
0
ファイル: test_config.py プロジェクト: sudhu26/fonduer
def test_load_config():
    """Simple sanity check for loading feature config."""
    # Check that default is loaded
    defaults = get_config()
    assert defaults["featurization"]["textual"]["window_feature"]["size"] == 3
    assert defaults["learning"]["LSTM"]["emb_dim"] == 100
    assert defaults["learning"]["LSTM"]["bias"] is False

    # Check that file is loaded if present
    settings = get_config(os.path.dirname(__file__))
    assert settings["featurization"]["textual"]["window_feature"]["size"] == 8
    assert settings["learning"]["LSTM"]["bias"] is False

    # Check that defaults are used for unspecified settings
    assert (settings["featurization"]["tabular"]["unary_features"]
            ["get_head_ngrams"]["max"] == 2)
コード例 #2
0
def test_load_config(caplog):
    """Simple sanity check for loading feature config."""
    caplog.set_level(logging.INFO)

    # Check that default is loaded
    defaults = get_config()
    assert defaults["featurization"]["content"]["window_feature"]["size"] == 3
    assert defaults["learning"]["LSTM"]["emb_dim"] == 100
    assert defaults["learning"]["LSTM"]["host_device"] == "CPU"

    # Check that file is loaded if present
    settings = get_config(os.path.dirname(__file__))
    assert settings["featurization"]["content"]["window_feature"]["size"] == 8
    assert settings["learning"]["LSTM"]["host_device"] == "GPU"

    # Check that defaults are used for unspecified settings
    assert (
        settings["featurization"]["table"]["unary_features"]["get_head_ngrams"]["max"]
        == 2
    )
コード例 #3
0
    def _update_settings(self, X):
        """
        Update the model argument.

        :param X: The input data of the model.
        :type X: list of (candidate, features) pair
        """

        self.logger.info("Load defalut parameters for Logistic Regression")
        config = get_config()["learning"]["LogisticRegression"]

        for key in config.keys():
            if key not in self.settings:
                self.settings[key] = config[key]

        self.settings["input_dim"] = X[1].shape[1]
コード例 #4
0
    def _update_settings(self, X):
        """
        Update the model argument.

        :param X: The input data of the model.
        :type X: list of (candidate, features) pairs
        """

        self.logger.info("Load defalut parameters for LSTM")
        config = get_config()["learning"]["LSTM"]

        for key in config.keys():
            if key not in self.settings:
                self.settings[key] = config[key]

        self.settings["relation_arity"] = len(X[0][0])
        self.settings["input_dim"] = X[1].shape[1] + len(
            X[0][0]) * self.settings["hidden_dim"] * (
                2 if self.settings["bidirectional"] else 1)
コード例 #5
0
    def _update_kwargs(self, X, **model_kwargs):
        """
        Update the model argument.

        :param X: The input data of the model
        :param model_kwargs: The arguments of the model
        """
        self.logger.info("Load defalut parameters for LSTM")
        settings = get_config()["learning"]["LSTM"]

        for key in settings.keys():
            if key not in model_kwargs:
                model_kwargs[key] = settings[key]

        model_kwargs["relation_arity"] = len(X[0][0])
        model_kwargs["input_dim"] = X[0][1].shape[1] + len(X[0][0]) * model_kwargs[
            "hidden_dim"
        ] * (2 if model_kwargs["bidirectional"] else 1)

        return model_kwargs
コード例 #6
0
    compile_relation_feature_generator,
)

from fonduer.candidates.models.span_mention import TemporarySpanMention
from fonduer.features.feature_libs.tree_structs import corenlp_to_xmltree
from fonduer.utils.config import get_config
from fonduer.utils.data_model_utils import get_left_ngrams, get_right_ngrams
from fonduer.utils.utils import get_as_dict, tokens_to_ngrams

DEF_VALUE = 1

unary_ddlib_feats = {}
unary_word_feats = {}
unary_tdl_feats = {}
binary_tdl_feats = {}
settings = get_config()


def get_content_feats(candidates):
    candidates = candidates if isinstance(candidates, list) else [candidates]
    for candidate in candidates:
        args = tuple([m.context for m in candidate.get_mentions()])
        if not (isinstance(args[0], TemporarySpanMention)):
            raise ValueError(
                f"Accepts Span-type arguments, {type(candidate)}-type found.")

        # Unary candidates
        if len(args) == 1:
            span = args[0]
            if span.sentence.is_lingual():
                get_tdl_feats = compile_entity_feature_generator()
コード例 #7
0
ファイル: task.py プロジェクト: wajdikhattel/fonduer
def create_task(
    task_names: Union[str, List[str]],
    n_arities: Union[int, List[int]],
    n_features: int,
    n_classes: Union[int, List[int]],
    emb_layer: Optional[EmbeddingModule],
    model: str = "LSTM",
    mode: str = "MTL",
) -> List[EmmentalTask]:
    """Create task from relation(s).

    :param task_names: Relation name(s), If str, only one relation; If List[str],
        multiple relations.
    :param n_arities: The arity of each relation.
    :param n_features: The multimodal feature set size.
    :param n_classes: Number of classes for each task. (Only support classification
        task now).
    :param emb_layer: The embedding layer for LSTM. No need for LogisticRegression
        model.
    :param model: Model name (available models: "LSTM", "LogisticRegression"),
        defaults to "LSTM".
    :param mode: Learning mode (available modes: "STL", "MTL"),
        defaults to "MTL".
    """
    if model not in ["LSTM", "LogisticRegression"]:
        raise ValueError(
            f"Unrecognized model {model}. Only support {['LSTM', 'LogisticRegression']}"
        )

    if mode not in ["STL", "MTL"]:
        raise ValueError(
            f"Unrecognized mode {mode}. Only support {['STL', 'MTL']}")

    config = get_config()["learning"][model]
    logger.info(f"{model} model config: {config}")

    if not isinstance(task_names, list):
        task_names = [task_names]
    if not isinstance(n_arities, list):
        n_arities = [n_arities]
    if not isinstance(n_classes, list):
        n_classes = [n_classes]

    tasks = []

    for task_name, n_arity, n_class in zip(task_names, n_arities, n_classes):
        if mode == "MTL":
            feature_module_name = "shared_feature"
        else:
            feature_module_name = f"{task_name}_feature"

        if model == "LSTM":
            module_pool = nn.ModuleDict({
                "emb":
                emb_layer,
                feature_module_name:
                SparseLinear(n_features + 1,
                             config["hidden_dim"],
                             bias=config["bias"]),
            })
            for i in range(n_arity):
                module_pool.update({
                    f"{task_name}_lstm{i}":
                    RNN(
                        num_classes=0,
                        emb_size=emb_layer.dim,
                        lstm_hidden=config["hidden_dim"],
                        attention=config["attention"],
                        dropout=config["dropout"],
                        bidirectional=config["bidirectional"],
                    )
                })
            module_pool.update({
                f"{task_name}_pred_head":
                ConcatLinear(
                    [f"{task_name}_lstm{i}"
                     for i in range(n_arity)] + [feature_module_name],
                    config["hidden_dim"] * (2 * n_arity + 1)
                    if config["bidirectional"] else config["hidden_dim"] *
                    (n_arity + 1),
                    n_class,
                )
            })

            task_flow = []
            task_flow += [{
                "name": f"{task_name}_emb{i}",
                "module": "emb",
                "inputs": [("_input_", f"m{i}")],
            } for i in range(n_arity)]
            task_flow += [{
                "name":
                f"{task_name}_lstm{i}",
                "module":
                f"{task_name}_lstm{i}",
                "inputs": [(f"{task_name}_emb{i}", 0),
                           ("_input_", f"m{i}_mask")],
            } for i in range(n_arity)]
            task_flow += [{
                "name":
                feature_module_name,
                "module":
                feature_module_name,
                "inputs": [
                    ("_input_", "feature_index"),
                    ("_input_", "feature_weight"),
                ],
            }]
            task_flow += [{
                "name": f"{task_name}_pred_head",
                "module": f"{task_name}_pred_head",
                "inputs": None,
            }]
        elif model == "LogisticRegression":
            module_pool = nn.ModuleDict({
                feature_module_name:
                SparseLinear(n_features + 1,
                             config["hidden_dim"],
                             bias=config["bias"]),
                f"{task_name}_pred_head":
                ConcatLinear([feature_module_name], config["hidden_dim"],
                             n_class),
            })

            task_flow = [
                {
                    "name":
                    feature_module_name,
                    "module":
                    feature_module_name,
                    "inputs": [
                        ("_input_", "feature_index"),
                        ("_input_", "feature_weight"),
                    ],
                },
                {
                    "name": f"{task_name}_pred_head",
                    "module": f"{task_name}_pred_head",
                    "inputs": None,
                },
            ]
        else:
            raise ValueError(f"Unrecognized model {model}.")

        tasks.append(
            EmmentalTask(
                name=task_name,
                module_pool=module_pool,
                task_flow=task_flow,
                loss_func=partial(loss, f"{task_name}_pred_head"),
                output_func=partial(output, f"{task_name}_pred_head"),
                scorer=Scorer(
                    metrics=["accuracy", "precision", "recall", "f1"]),
            ))

    return tasks