Exemple #1
0
 def test_get_dictionaries(self):
     df = pd.DataFrame([['a', 'R1', 'b'], ['c', 'R2', 'd']],
                       columns=['from', 'rel', 'to'])
     assert get_dictionaries(df, ent=True) == self.d1
     assert get_dictionaries(df, ent=False) == self.d2
Exemple #2
0
    def __init__(
        self,
        df=None,
        kg=None,
        ent2ix=None,
        rel2ix=None,
        dict_of_heads=None,
        dict_of_tails=None,
    ):

        if df is None:
            if kg is None:
                raise WrongArgumentsError(
                    "Please provide at least one " "argument of `df` and kg`"
                )
            else:
                try:
                    assert (
                        (type(kg) == dict)
                        & ("heads" in kg.keys())
                        & ("tails" in kg.keys())
                        & ("relations" in kg.keys())
                    )
                except AssertionError:
                    raise WrongArgumentsError(
                        "Keys in the `kg` dict should "
                        "contain `heads`, `tails`, "
                        "`relations`."
                    )
                try:
                    assert (rel2ix is not None) & (ent2ix is not None)
                except AssertionError:
                    raise WrongArgumentsError(
                        "Please provide the two "
                        "dictionaries ent2ix and rel2ix "
                        "if building from `kg`."
                    )
        else:
            if kg is not None:
                raise WrongArgumentsError(
                    "`df` and kg` arguments should not " "both be provided."
                )

        if ent2ix is None:
            self.ent2ix = get_dictionaries(df, ent=True)
        else:
            self.ent2ix = ent2ix

        if rel2ix is None:
            self.rel2ix = get_dictionaries(df, ent=False)
        else:
            self.rel2ix = rel2ix

        self.n_ent = max(self.ent2ix.values()) + 1
        self.n_rel = max(self.rel2ix.values()) + 1

        if df is not None:
            # build kg from a pandas dataframe
            self.n_facts = len(df)
            self.head_idx = tensor(df["from"].map(self.ent2ix).values).long()
            self.tail_idx = tensor(df["to"].map(self.ent2ix).values).long()
            self.relations = tensor(df["rel"].map(self.rel2ix).values).long()
            self.magnitudes = tensor(df["how-much"], dtype=float64)

        else:
            # build kg from another kg
            self.n_facts = kg["heads"].shape[0]
            self.head_idx = kg["heads"]
            self.tail_idx = kg["tails"]
            self.relations = kg["relations"]
            self.magnitudes = kg["magnitudes"]


        if dict_of_heads is None or dict_of_tails is None:
            self.dict_of_heads = defaultdict(set)
            self.dict_of_tails = defaultdict(set)
            self.evaluate_dicts()

        else:
            self.dict_of_heads = dict_of_heads
            self.dict_of_tails = dict_of_tails
        try:
            self.sanity_check()
        except AssertionError:
            raise SanityError("Please check the sanity of arguments.")
    def __init__(self,
                 df=None,
                 kg=None,
                 ent2ix=None,
                 rel2ix=None,
                 dict_of_heads=None,
                 dict_of_tails=None,
                 dict_of_rel=None,
                 id2point=None,
                 geo=None):

        if df is None:
            if kg is None:
                raise WrongArgumentsError("Please provide at least one "
                                          "argument of `df` and kg`")
            else:
                try:
                    assert (type(kg) == dict) & ('heads' in kg.keys()) & \
                           ('tails' in kg.keys()) & \
                           ('relations' in kg.keys())
                except AssertionError:
                    raise WrongArgumentsError("Keys in the `kg` dict should "
                                              "contain `heads`, `tails`, "
                                              "`relations`.")
                try:
                    assert (rel2ix is not None) & (ent2ix is not None)
                except AssertionError:
                    raise WrongArgumentsError("Please provide the two "
                                              "dictionaries ent2ix and rel2ix "
                                              "if building from `kg`.")
        else:
            if kg is not None:
                raise WrongArgumentsError("`df` and kg` arguments should not "
                                          "both be provided.")

        if ent2ix is None:
            self.ent2ix = get_dictionaries(df, ent=True)
        else:
            self.ent2ix = ent2ix

        if rel2ix is None:
            self.rel2ix = get_dictionaries(df, ent=False)
        else:
            self.rel2ix = rel2ix

        if id2point is not None:
            self.id2point = id2point

        self.n_ent = max(self.ent2ix.values()) + 1
        self.n_rel = max(self.rel2ix.values()) + 1
        self.geo = geo

        if df is not None:
            # build kg from a pandas dataframe
            self.n_facts = len(df)
            self.head_idx = tensor(df['from'].map(self.ent2ix).values).long()
            self.tail_idx = tensor(df['to'].map(self.ent2ix).values).long()
            self.relations = tensor(df['rel'].map(self.rel2ix).values).long()
        else:
            # build kg from another kg
            self.n_facts = kg['heads'].shape[0]
            self.head_idx = kg['heads']
            self.tail_idx = kg['tails']
            self.relations = kg['relations']
            try:
                self.point = kg['point']
            except:
                pass

        if (geo is not None) and (df is not None):  # Geo
            self.entity2point, self.id2point = self.load_point(geo)
            self.point = np.array([[
                self.entity2point[triplet[0]], self.entity2point[triplet[2]]
            ] for triplet in df.values])

        if dict_of_heads is None or dict_of_tails is None or dict_of_rel is None:
            self.dict_of_heads = defaultdict(set)
            self.dict_of_tails = defaultdict(set)
            self.dict_of_rel = defaultdict(set)
            self.evaluate_dicts()

        else:
            self.dict_of_heads = dict_of_heads
            self.dict_of_tails = dict_of_tails
            self.dict_of_rel = dict_of_rel
        try:
            self.sanity_check()
        except AssertionError:
            raise SanityError("Please check the sanity of arguments.")
Exemple #4
0
    def __init__(self,
                 df=None,
                 kg=None,
                 ent2ix=None,
                 rel2ix=None,
                 dict_of_heads=None,
                 dict_of_tails=None):

        if df is None:
            if kg is None:
                raise WrongArgumentsError("Please provide at least one "
                                          "argument of `df` and kg`")
            else:
                try:
                    assert (type(kg) == dict) & ('heads' in kg.keys()) & \
                           ('tails' in kg.keys()) & \
                           ('relations' in kg.keys())
                except AssertionError:
                    raise WrongArgumentsError("Keys in the `kg` dict should "
                                              "contain `heads`, `tails`, "
                                              "`relations`.")
                try:
                    assert (rel2ix is not None) & (ent2ix is not None)
                except AssertionError:
                    raise WrongArgumentsError("Please provide the two "
                                              "dictionaries ent2ix and rel2ix "
                                              "if building from `kg`.")
        else:
            if kg is not None:
                raise WrongArgumentsError("`df` and kg` arguments should not "
                                          "both be provided.")

        if ent2ix is None:
            self.ent2ix = get_dictionaries(df, ent=True)
        else:
            self.ent2ix = ent2ix
        self.ix2ent = {v: k for k, v in self.ent2ix.items()}

        if rel2ix is None:
            self.rel2ix = get_dictionaries(df, ent=False)
        else:
            self.rel2ix = rel2ix
        self.ix2rel = {v: k for k, v in self.rel2ix.items()}

        self.n_ent = max(self.ent2ix.values()) + 1
        self.n_rel = max(self.rel2ix.values()) + 1

        if df is not None:
            # build kg from a pandas dataframe
            self.n_facts = len(df)
            self.head_idx = tensor(df['from'].map(self.ent2ix).values).long()
            self.tail_idx = tensor(df['to'].map(self.ent2ix).values).long()
            self.relations = tensor(df['rel'].map(self.rel2ix).values).long()
        else:
            # build kg from another kg
            self.n_facts = kg['heads'].shape[0]
            self.head_idx = kg['heads']
            self.tail_idx = kg['tails']
            self.relations = kg['relations']

        if dict_of_heads is None or dict_of_tails is None:
            self.dict_of_heads = defaultdict(set)
            self.dict_of_tails = defaultdict(set)
            self.evaluate_dicts()

        else:
            self.dict_of_heads = dict_of_heads
            self.dict_of_tails = dict_of_tails
        try:
            self.sanity_check()
        except AssertionError:
            raise SanityError("Please check the sanity of arguments.")