Beispiel #1
0
import sys
from load_wikidata2 import load_wikidata
import json
from itertools import izip
import re
wikidata, reverse_dict, prop_data, child_par_dict, wikidata_fanout_dict = load_wikidata(
)
par_child_dict = json.load(
    open('/dccstor/cssblr/vardaan/dialog-qa/par_child_dict.json'))


def parse_active_set(active_set, target):
    active_set = active_set.strip()
    anding = False
    orring = False
    notting1 = False
    notting2 = False
    if active_set.startswith('AND(') or active_set.startswith('OR('):
        if active_set.startswith('AND('):
            anding = True
            active_set = re.sub('^\(|\)$', '',
                                active_set.replace('AND', '', 1))
        if active_set.startswith('OR('):
            anding = True
            active_set = re.sub('^\(|\)$', '', active_set.replace('OR', '', 1))
        while active_set.startswith('(') and active_set.endswith(')'):
            active_set = re.sub('^\(|\)$', '', active_set)
        active_set_parts = active_set.split(', ')
        active_set_part1 = active_set_parts[0].strip()
        active_set_part2 = active_set_parts[1].strip()
        if active_set_part1.startswith('NOT('):
Beispiel #2
0
 def __init__(self,
              wikidata_dir,
              num_timesteps,
              program_type_vocab,
              argument_type_vocab,
              printing,
              relaxed_reward_strict,
              reward_function="jaccard",
              boolean_reward_multiplier=0.1,
              relaxed_reward_till_epoch=[-1, -1],
              unused_var_penalize_after_epoch=[1000, 100000],
              length_based_penalization=False):
     """
     wikidata_dir: directory containing the wikidata knowledge base
     num_timesteps: maximum length of the generated program
     program_type_vocab: vocabulary of operator types
     argument_type_vocab: vocabulary of argument types
     printing: boolean flag indicating whether printing should be on or off
     relaxed_reward_strict: boolean flag indicating whether the relaxed reward (auxiliary reward) should be on 
     reward function: can be F1, jaccard, Prec or Recall
     boolean_reward_multiplier: an internal weight multiplier for the reward, which ensures that the model does not bias itself towards producing boolean (True/False) answers which have dense reward structure
     relaxed_reward_till_epoch: maximum epoch number till which the relaxed (auxiliary) reward would be activated
     unused_var_penalize_after_epoch: epoch number after which the model would be penalized for producing variables in its program which it does not consume
     length_based_penalization: boolean flag indicating whether the model should be penalized for producing longer programs
     """
     np.random.seed(1)
     self.wikidata, self.reverse_wikidata, self.wikidata_type, self.reverse_wikidata_type, self.wikidata_ent_type, self.reverse_wikidata_ent_type = load_wikidata(
         wikidata_dir)
     self.argument_type_vocab = argument_type_vocab
     self.printing = printing
     self.argument_type_vocab_inv = {
         v: k
         for k, v in self.argument_type_vocab.items()
     }
     self.program_type_vocab = program_type_vocab
     self.length_based_penalization = length_based_penalization
     self.relaxed_reward_strict = relaxed_reward_strict
     self.relaxed_reward_till_epoch = relaxed_reward_till_epoch
     self.unused_var_penalize_after_epoch = unused_var_penalize_after_epoch
     self.reward_function = reward_function
     if self.reward_function not in ["jaccard", "recall", "f1"]:
         raise Exception(
             'reward function must be either jaccard or recall or f1')
     self.boolean_reward_multiplier = boolean_reward_multiplier
     self.program_type_vocab_inv = {
         v: k
         for k, v in self.program_type_vocab.items()
     }
     self.map_program_to_func = {}
     self.map_program_to_func["gen_set"] = self.execute_gen_set
     self.map_program_to_func["gen_map1"] = self.execute_gen_map1
     self.map_program_to_func["verify"] = self.execute_verify
     self.map_program_to_func[
         "set_oper_count"] = self.execute_set_oper_count
     self.map_program_to_func[
         "set_oper_union"] = self.execute_set_oper_union
     self.map_program_to_func[
         "set_oper_ints"] = self.execute_set_oper_intersec
     self.map_program_to_func["set_oper_diff"] = self.execute_set_oper_diff
     self.map_program_to_func[
         "map_oper_count"] = self.execute_map_oper_count
     self.map_program_to_func[
         "map_oper_union"] = self.execute_map_oper_union
     self.map_program_to_func[
         "map_oper_ints"] = self.execute_map_oper_intersec
     self.map_program_to_func["map_oper_diff"] = self.execute_map_oper_diff
     self.map_program_to_func[
         "select_oper_max"] = self.execute_select_oper_max
     self.map_program_to_func[
         "select_oper_min"] = self.execute_select_oper_min
     self.map_program_to_func[
         "select_oper_atleast"] = self.execute_select_oper_atleast
     self.map_program_to_func[
         "select_oper_atmost"] = self.execute_select_oper_atmost
     self.map_program_to_func[
         "select_oper_more"] = self.execute_select_oper_more
     self.map_program_to_func[
         "select_oper_less"] = self.execute_select_oper_less
     self.map_program_to_func[
         "select_oper_equal"] = self.execute_select_oper_equal
     self.map_program_to_func[
         "select_oper_approx"] = self.execute_select_oper_approx
     self.map_program_to_func["none"] = self.execute_none
     self.map_program_to_func["terminate"] = self.execute_terminate
     self.HIGH_NEGATIVE_REWARD = 1
     self.HIGHEST_NEGATIVE_REWARD = 1
     self.num_timesteps = num_timesteps
     self.rewards = None
     self.parallel = 0
Beispiel #3
0
 def __init__(self,
              wikidata_dir,
              num_timesteps,
              program_type_vocab,
              argument_type_vocab,
              printing,
              terminate_prog,
              relaxed_reward_strict,
              reward_function="jaccard",
              boolean_reward_multiplier=0.1,
              relaxed_reward_till_epoch=[-1, -1],
              unused_var_penalize_after_epoch=[1000, 100000],
              length_based_penalization=False):
     np.random.seed(1)
     self.wikidata, self.reverse_wikidata, self.wikidata_type, self.reverse_wikidata_type, self.wikidata_ent_type, self.reverse_wikidata_ent_type = load_wikidata(
         wikidata_dir)
     self.argument_type_vocab = argument_type_vocab
     self.printing = printing
     self.terminate_prog = terminate_prog
     self.argument_type_vocab_inv = {
         v: k
         for k, v in self.argument_type_vocab.items()
     }
     self.program_type_vocab = program_type_vocab
     self.length_based_penalization = length_based_penalization
     self.relaxed_reward_strict = relaxed_reward_strict
     self.relaxed_reward_till_epoch = relaxed_reward_till_epoch
     self.unused_var_penalize_after_epoch = unused_var_penalize_after_epoch
     self.reward_function = reward_function
     if self.reward_function not in ["jaccard", "recall", "f1"]:
         raise Exception(
             'reward function must be either jaccard or recall or f1')
     self.boolean_reward_multiplier = boolean_reward_multiplier
     self.program_type_vocab_inv = {
         v: k
         for k, v in self.program_type_vocab.items()
     }
     self.map_program_to_func = {}
     self.map_program_to_func["gen_set"] = self.execute_gen_set
     self.map_program_to_func["gen_map1"] = self.execute_gen_map1
     self.map_program_to_func["verify"] = self.execute_verify
     self.map_program_to_func[
         "set_oper_count"] = self.execute_set_oper_count
     self.map_program_to_func[
         "set_oper_union"] = self.execute_set_oper_union
     self.map_program_to_func[
         "set_oper_ints"] = self.execute_set_oper_intersec
     self.map_program_to_func["set_oper_diff"] = self.execute_set_oper_diff
     self.map_program_to_func[
         "map_oper_count"] = self.execute_map_oper_count
     self.map_program_to_func[
         "map_oper_union"] = self.execute_map_oper_union
     self.map_program_to_func[
         "map_oper_ints"] = self.execute_map_oper_intersec
     self.map_program_to_func["map_oper_diff"] = self.execute_map_oper_diff
     self.map_program_to_func[
         "select_oper_max"] = self.execute_select_oper_max
     self.map_program_to_func[
         "select_oper_min"] = self.execute_select_oper_min
     self.map_program_to_func[
         "select_oper_atleast"] = self.execute_select_oper_atleast
     self.map_program_to_func[
         "select_oper_atmost"] = self.execute_select_oper_atmost
     self.map_program_to_func[
         "select_oper_more"] = self.execute_select_oper_more
     self.map_program_to_func[
         "select_oper_less"] = self.execute_select_oper_less
     self.map_program_to_func[
         "select_oper_equal"] = self.execute_select_oper_equal
     self.map_program_to_func[
         "select_oper_approx"] = self.execute_select_oper_approx
     self.map_program_to_func["none"] = self.execute_none
     self.map_program_to_func["terminate"] = self.execute_terminate
     self.HIGH_NEGATIVE_REWARD = 1
     self.HIGHEST_NEGATIVE_REWARD = 1
     self.num_timesteps = num_timesteps
     self.rewards = None
     self.parallel = 0
     self.printing = False
Beispiel #4
0
    def __init__(self,
                 data_dir,
                 embed_dim=100,
                 fanout_thresh=2,
                 eval_batch=32):

        self.__embed_dim = embed_dim
        self.__initialized = True
        self.eval_batch = eval_batch

        # self.__trainable = list()

        # ********************************************************************************************
        with tf.device('/cpu'):
            wikidata, prop_data, wikidata_fanout_dict, child_par_dict = load_wikidata(
            )
            # print len(wikidata)
            wikidata_remove_list = [
                q for q in wikidata if wikidata_fanout_dict[q] <= fanout_thresh
            ]

            if fanout_thresh == 2:
                wikidata_remove_list.extend(wikidata.keys()[-100000:])

            for q in wikidata_remove_list:
                wikidata.pop(q, None)

            self.__relation_id_map = {
                pid: i
                for i, pid in enumerate(prop_data.keys())
            }
            self.__entity_id_map = {
                qid: i
                for i, qid in enumerate(wikidata.keys())
            }

            self.__id_relation_map = {
                i: pid
                for i, pid in enumerate(prop_data.keys())
            }
            self.__id_entity_map = {
                i: qid
                for i, qid in enumerate(wikidata.keys())
            }

            self.__n_entity = len(self.__entity_id_map.keys())
            self.__n_relation = len(self.__relation_id_map.keys())

            def load_triple():
                triples_arr = []

                for QID in self.__entity_id_map.keys():
                    for pid in [
                            p for p in wikidata[QID]
                            if p in self.__relation_id_map
                    ]:
                        for qid in [
                                q for q in wikidata[QID][pid]
                                if q in child_par_dict
                                and q in self.__entity_id_map
                        ]:
                            triples_arr.append([
                                self.__entity_id_map[QID],
                                self.__entity_id_map[qid],
                                self.__relation_id_map[pid]
                            ])
                            # if len(triples_arr) > 10000:
                            #     return np.asarray(triples_arr,dtype=np.int32)
                return np.asarray(triples_arr, dtype=np.int32)

            # self.__n_entity = 2900000
            # self.__n_relation = 567

            # def load_triple():
            #     triples_arr = []

            #     for i in xrange(11963): #11963105
            #         triples_arr.append([random.randint(0,self.__n_entity-1), random.randint(0,self.__n_entity-1), random.randint(0,self.__n_relation-1)])
            #                 # if len(triples_arr) > 1000000:
            #                 #     return np.asarray(triples_arr)
            #     return np.asarray(triples_arr, dtype=np.int32)

            triples_arr = load_triple()
            idx = np.random.permutation(np.arange(triples_arr.shape[0]))

            self.__train_triple = triples_arr[:int(0.7 * idx.shape[0])]
            self.__valid_triple = triples_arr[int(0.7 * idx.shape[0]
                                                  ):int(0.8 * idx.shape[0])]
            self.__test_triple = triples_arr[int(0.8 * idx.shape[0]):]

            # ********************************************************************************************
            # with codecs.open(os.path.join(data_dir, 'entity2id.txt'), 'r', encoding='utf-8') as f:
            #     self.__n_entity = len(f.readlines())

            # with codecs.open(os.path.join(data_dir, 'entity2id.txt'), 'r', encoding='utf-8') as f:
            #     self.__entity_id_map = {x.strip().split('\t')[0]: int(x.strip().split('\t')[1]) for x in f.readlines()}
            #     self.__id_entity_map = {v: k for k, v in self.__entity_id_map.items()}

            # with codecs.open(os.path.join(data_dir, 'relation2id.txt'), 'r', encoding='utf-8') as f:
            #     self.__n_relation = len(f.readlines())

            # with codecs.open(os.path.join(data_dir, 'relation2id.txt'), 'r', encoding='utf-8') as f:
            #     self.__relation_id_map = {x.strip().split('\t')[0]: int(x.strip().split('\t')[1]) for x in f.readlines()}
            #     self.__id_relation_map = {v: k for k, v in self.__entity_id_map.items()}

            # def load_triple(file_path):
            #     with codecs.open(file_path, 'r', encoding='utf-8') as f_triple:
            #         return np.asarray([[self.__entity_id_map[x.strip().split('\t')[0]],
            #                             self.__entity_id_map[x.strip().split('\t')[1]],
            #                             self.__relation_id_map[x.strip().split('\t')[2]]] for x in f_triple.readlines()],
            #                           dtype=np.int32)

            # self.__train_triple = load_triple(os.path.join(data_dir, 'train.txt'))
            # self.__test_triple = load_triple(os.path.join(data_dir, 'test.txt'))
            # self.__valid_triple = load_triple(os.path.join(data_dir, 'valid.txt'))

            # ********************************************************************************************
            print("N_ENTITY: %d" % self.__n_entity)
            print("N_RELATION: %d" % self.__n_relation)

            print("N_TRAIN_TRIPLES: %d" % self.__train_triple.shape[0])

            print("N_TEST_TRIPLES: %d" % self.__test_triple.shape[0])

            print("N_VALID_TRIPLES: %d" % self.__valid_triple.shape[0])

            def gen_hr_t(triple_data):
                hr_t = dict()
                for h, t, r in triple_data:
                    if h not in hr_t:
                        hr_t[h] = dict()
                    if r not in hr_t[h]:
                        hr_t[h][r] = set()
                    hr_t[h][r].add(t)
                return hr_t

            def gen_tr_h(triple_data):
                tr_h = dict()
                for h, t, r in triple_data:
                    if t not in tr_h:
                        tr_h[t] = dict()
                    if r not in tr_h[t]:
                        tr_h[t][r] = set()
                    tr_h[t][r].add(h)
                return tr_h

            #self.__train_hr_t = gen_hr_t(self.__train_triple)
            #self.__train_tr_h = gen_tr_h(self.__train_triple)
            #self.__test_hr_t = gen_hr_t(self.__test_triple)
            #self.__test_tr_h = gen_tr_h(self.__test_triple)

            self.__hr_t = gen_hr_t(
                np.concatenate([
                    self.__train_triple, self.__test_triple,
                    self.__valid_triple
                ],
                               axis=0))
            self.__tr_h = gen_tr_h(
                np.concatenate([
                    self.__train_triple, self.__test_triple,
                    self.__valid_triple
                ],
                               axis=0))

        bound = 6 / math.sqrt(embed_dim)

        # with tf.device('/cpu'):
        self.ent_embeddings = tf.get_variable(
            "ent_embedding", [self.__n_entity, embed_dim],
            initializer=tf.random_uniform_initializer(minval=-bound,
                                                      maxval=bound,
                                                      seed=345))
        # self.__trainable.append(self.ent_embeddings)

        self.rel_embeddings = tf.get_variable(
            "rel_embedding", [self.__n_relation, embed_dim],
            initializer=tf.random_uniform_initializer(minval=-bound,
                                                      maxval=bound,
                                                      seed=346))
Beispiel #5
0
    def __init__(self,
                 max_utter,
                 max_len,
                 start_symbol_index,
                 end_symbol_index,
                 unk_symbol_index,
                 pad_symbol_index,
                 pad_kb_symbol_index,
                 nkb_symbol_index,
                 stopwords,
                 stopwords_histogram,
                 lucene_dir,
                 transe_dir,
                 wikidata_dir,
                 glove_dir,
                 max_mem_size,
                 max_target_size,
                 vocab_max_len,
                 all_possible_ngrams,
                 cutoff=-1):
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger('prepare_data_for_hred')
        self.max_utter = max_utter
        self.max_len = max_len
        self.unknown_word_id = unk_symbol_index
        self.start_word_id = start_symbol_index
        self.pad_word_id = pad_symbol_index
        self.end_word_id = end_symbol_index
        self.kb_word_id = 4
        self.start_word_symbol = '</s>'
        self.end_word_symbol = '</e>'
        self.pad_symbol = '<pad>'
        self.unk_symbol = '<unk>'
        self.kb_word_symbol = '<kb>'
        self.pad_kb_symbol_index = pad_kb_symbol_index
        self.nkb_symbol_index = nkb_symbol_index
        self.pad_kb_symbol = '<pad_kb>'
        self.nkb_symbol = '<nkb>'
        self.cutoff = cutoff
        self.vocab_max_len = vocab_max_len
        self.all_possible_ngrams = all_possible_ngrams
        self.input = None
        self.output = None
        self.vocab_file = None
        self.vocab_dict = None
        self.response_vocab_file = None
        self.response_vocab_dict = None
        self.word_counter = None
        self.max_mem_size = max_mem_size
        self.max_target_size = max_target_size
        self.lemmatizer = nltk.WordNetLemmatizer()
        self.stemmer = nltk.stem.porter.PorterStemmer()
        self.bad_qids = set(
            ['Q184386', 'Q1541554', 'Q540955', 'Q2620241',
             'Q742391'])  #adding Yes/No
        self.bad_qids.update(
            pkl.load(open('wikidata_entities_with_digitnames.pkl')))
        self.wikidata_qid_to_name = json.load(
            open(wikidata_dir + '/items_wikidata_n.json'))
        self.use_gold_entities = False
        self.use_gold_relations = False
        self.use_gold_types = False
        self.use_direct_only = False
        #Taken from Su Nam Kim Paper...
        self.grammar = r"""
            NBAR:
                {<NN.*|JJ>*<NN.*>}  # Nouns and Adjectives, terminated with Nouns
              
            NP:
                {<NBAR>}
                {<NBAR><IN><NBAR>}  # Above, connected with in/of/etc...
        """
        self.chunker = nltk.RegexpParser(self.grammar)
        #************************************************************************#
        self.stop_set = pkl.load(open(stopwords))
        self.stop_vocab = read_file_as_dict(stopwords_histogram)
        self.ls = LuceneSearch(lucene_dir)
        self.question_parser = QuestionParser(None, self.stop_vocab,
                                              self.stop_set, self.bad_qids,
                                              self.ls,
                                              self.wikidata_qid_to_name,
                                              self.all_possible_ngrams)
        self.wikidata, self.reverse_dict, self.prop_data, self.child_par_dict, self.child_all_par_dict, self.wikidata_fanout_dict, self.par_child_dict = load_wikidata(
            wikidata_dir)
        self.id_entity_map = {
            self.pad_kb_symbol_index: self.pad_kb_symbol,
            self.nkb_symbol_index: self.nkb_symbol
        }
        self.id_entity_map.update({
            (k + 2): v
            for k, v in pkl.load(open(transe_dir +
                                      '/id_ent_map.pickle', 'rb')).iteritems()
        })

        self.id_rel_map = {
            self.pad_kb_symbol_index: self.pad_kb_symbol,
            self.nkb_symbol_index: self.nkb_symbol
        }
        self.id_rel_map.update({
            (k + 2): v
            for k, v in pkl.load(open(transe_dir +
                                      '/id_rel_map.pickle', 'rb')).iteritems()
        })

        self.entity_id_map = {v: k for k, v in self.id_entity_map.iteritems()}
        self.rel_id_map = {v: k for k, v in self.id_rel_map.iteritems()}

        self.kb_ov_idx = 1  # symbol assigned to entries out of kb or for padding to target_ids
        self.kb_rel_ov_idx = 1  # symbol assigned to entries out of kb or for padding to target_ids

        glove_model = gensim.models.KeyedVectors.load_word2vec_format(
            glove_dir + '/GoogleNews-vectors-negative300.bin', binary=True
        )  #/dccstor/cssblr/amrita/resources/glove/GoogleNews-vectors-negative300.bin', binary=True)
        vocab = glove_model.wv.vocab.keys()
        self.glove_embedding = {v: glove_model.wv[v] for v in vocab}
        print 'loaded glove embeddings'
        self.ann_rel = AnnoyIndex(300, metric='euclidean')
        self.ann_rel.load(
            'relation_linker/annoy_index_rel_noisy/glove_embedding_of_vocab.ann'
        )
        self.ann_pickle_rel = pkl.load(
            open('relation_linker/annoy_index_rel_noisy/index2rel.pkl'))
        self.ann_type = AnnoyIndex(300, metric='euclidean')
        self.ann_type.load(
            'type_linker/annoy_index_type/glove_embedding_of_vocab.ann')
        self.ann_pickle_type = pkl.load(
            open('type_linker/annoy_index_type/index2type.pkl'))
        self.types = json.load(
            open('type_linker/annoy_index_type/type_names.json'))
    def __init__(self,
                 data_dir,
                 embed_dim=100,
                 combination_method='simple',
                 dropout=0.5,
                 neg_weight=0.5,
                 n_load_triples=1000000):

        if combination_method.lower() not in ['simple', 'matrix']:
            raise NotImplementedError(
                "ProjE does not support using %s as combination method." %
                combination_method)

        self.__combination_method = combination_method

        self.__embed_dim = embed_dim
        self.__initialized = False

        self.__trainable = list()
        self.__dropout = dropout

        # ********************************************************************************************
        wikidata, reverse_dict, item_data, prop_data, wikidata_fanout_dict, child_par_dict = load_wikidata(
        )
        print len(wikidata)
        self.__n_entity = len(wikidata)

        self.__relation_id_map = {
            pid: i
            for i, pid in enumerate(prop_data.keys())
        }
        self.__entity_id_map = {
            qid: i
            for i, qid in enumerate(wikidata.keys())
        }

        self.__id_relation_map = {
            i: pid
            for i, pid in enumerate(prop_data.keys())
        }
        self.__id_entity_map = {
            i: qid
            for i, qid in enumerate(wikidata.keys())
        }

        self.__n_relation = len(prop_data)

        def load_triple():
            triples_arr = []

            for QID in wikidata:
                for pid in [p for p in wikidata[QID] if p in prop_data]:
                    for qid in [
                            q for q in wikidata[QID][pid] if
                            q in child_par_dict and q in self.__entity_id_map
                    ]:
                        triples_arr.append([
                            self.__entity_id_map[QID],
                            self.__entity_id_map[qid],
                            self.__relation_id_map[pid]
                        ])
                        if len(triples_arr
                               ) > n_load_triples and n_load_triples > 0:
                            return np.asarray(triples_arr)

            return np.asarray(triples_arr, dtype=np.int32)

        triples_arr = load_triple()
        idx = np.random.permutation(np.arange(triples_arr.shape[0]))

        self.__train_triple = triples_arr[:int(0.7 * idx.shape[0])]
        self.__valid_triple = triples_arr[int(0.7 *
                                              idx.shape[0]):int(0.8 *
                                                                idx.shape[0])]
        self.__test_triple = triples_arr[int(0.8 * idx.shape[0]):]

        # ********************************************************************************************

        # with codecs.open(os.path.join(data_dir, 'entity2id.txt'), 'r', encoding='utf-8') as f:
        #     self.__n_entity = len(f.readlines())

        # with codecs.open(os.path.join(data_dir, 'entity2id.txt'), 'r', encoding='utf-8') as f:
        #     self.__entity_id_map = {x.strip().split('\t')[0]: int(x.strip().split('\t')[1]) for x in f.readlines()}
        #     self.__id_entity_map = {v: k for k, v in self.__entity_id_map.items()}

        # with codecs.open(os.path.join(data_dir, 'relation2id.txt'), 'r', encoding='utf-8') as f:
        #     self.__n_relation = len(f.readlines())

        # with codecs.open(os.path.join(data_dir, 'relation2id.txt'), 'r', encoding='utf-8') as f:
        #     self.__relation_id_map = {x.strip().split('\t')[0]: int(x.strip().split('\t')[1]) for x in f.readlines()}
        #     self.__id_relation_map = {v: k for k, v in self.__entity_id_map.items()}

        # def load_triple(file_path):
        #     with codecs.open(file_path, 'r', encoding='utf-8') as f_triple:
        #         return np.asarray([[self.__entity_id_map[x.strip().split('\t')[0]],
        #                             self.__entity_id_map[x.strip().split('\t')[1]],
        #                             self.__relation_id_map[x.strip().split('\t')[2]]] for x in f_triple.readlines()],dtype=np.int32)

        # self.__train_triple = load_triple(os.path.join(data_dir, 'train.txt'))
        # self.__test_triple = load_triple(os.path.join(data_dir, 'test.txt'))
        # self.__valid_triple = load_triple(os.path.join(data_dir, 'valid.txt'))

        # ********************************************************************************************

        print("N_ENTITY: %d" % self.__n_entity)
        print("N_RELATION: %d" % self.__n_relation)

        print("N_TRAIN_TRIPLES: %d" % self.__train_triple.shape[0])
        print("N_TEST_TRIPLES: %d" % self.__test_triple.shape[0])
        print("N_VALID_TRIPLES: %d" % self.__valid_triple.shape[0])

        def gen_hr_t(triple_data):
            hr_t = dict()
            for h, t, r in triple_data:
                if h not in hr_t:
                    hr_t[h] = dict()
                if r not in hr_t[h]:
                    hr_t[h][r] = set()
                hr_t[h][r].add(t)

            return hr_t

        def gen_tr_h(triple_data):
            tr_h = dict()
            for h, t, r in triple_data:
                if t not in tr_h:
                    tr_h[t] = dict()
                if r not in tr_h[t]:
                    tr_h[t][r] = set()
                tr_h[t][r].add(h)
            return tr_h

        self.__train_hr_t = gen_hr_t(self.__train_triple)
        self.__train_tr_h = gen_tr_h(self.__train_triple)
        self.__test_hr_t = gen_hr_t(self.__test_triple)
        self.__test_tr_h = gen_tr_h(self.__test_triple)

        self.__hr_t = gen_hr_t(
            np.concatenate(
                [self.__train_triple, self.__test_triple, self.__valid_triple],
                axis=0))
        self.__tr_h = gen_tr_h(
            np.concatenate(
                [self.__train_triple, self.__test_triple, self.__valid_triple],
                axis=0))

        bound = 6 / math.sqrt(embed_dim)

        with tf.device('/cpu'):
            self.__ent_embedding = tf.get_variable(
                "ent_embedding", [self.__n_entity, embed_dim],
                initializer=tf.random_uniform_initializer(minval=-bound,
                                                          maxval=bound,
                                                          seed=345))
            self.__trainable.append(self.__ent_embedding)

        self.__rel_embedding = tf.get_variable(
            "rel_embedding", [self.__n_relation, embed_dim],
            initializer=tf.random_uniform_initializer(minval=-bound,
                                                      maxval=bound,
                                                      seed=346))
        self.__trainable.append(self.__rel_embedding)

        if combination_method.lower() == 'simple':
            self.__hr_weighted_vector = tf.get_variable(
                "simple_hr_combination_weights", [embed_dim * 2],
                initializer=tf.random_uniform_initializer(minval=-bound,
                                                          maxval=bound,
                                                          seed=445))
            self.__tr_weighted_vector = tf.get_variable(
                "simple_tr_combination_weights", [embed_dim * 2],
                initializer=tf.random_uniform_initializer(minval=-bound,
                                                          maxval=bound,
                                                          seed=445))
            self.__trainable.append(self.__hr_weighted_vector)
            self.__trainable.append(self.__tr_weighted_vector)
            self.__hr_combination_bias = tf.get_variable("combination_bias_hr",
                                                         initializer=tf.zeros(
                                                             [embed_dim]))
            self.__tr_combination_bias = tf.get_variable("combination_bias_tr",
                                                         initializer=tf.zeros(
                                                             [embed_dim]))

            self.__trainable.append(self.__hr_combination_bias)
            self.__trainable.append(self.__tr_combination_bias)

        else:
            self.__hr_combination_matrix = tf.get_variable(
                "matrix_hr_combination_layer", [embed_dim * 2, embed_dim],
                initializer=tf.random_uniform_initializer(minval=-bound,
                                                          maxval=bound,
                                                          seed=555))
            self.__tr_combination_matrix = tf.get_variable(
                "matrix_tr_combination_layer", [embed_dim * 2, embed_dim],
                initializer=tf.random_uniform_initializer(minval=-bound,
                                                          maxval=bound,
                                                          seed=555))
            self.__trainable.append(self.__hr_combination_matrix)
            self.__trainable.append(self.__tr_combination_matrix)
            self.__hr_combination_bias = tf.get_variable("combination_bias_hr",
                                                         initializer=tf.zeros(
                                                             [embed_dim]))
            self.__tr_combination_bias = tf.get_variable("combination_bias_tr",
                                                         initializer=tf.zeros(
                                                             [embed_dim]))

            self.__trainable.append(self.__hr_combination_bias)
            self.__trainable.append(self.__tr_combination_bias)