コード例 #1
0
ファイル: DatasetManager.py プロジェクト: NooneBug/MTNCI
    def load_entities_data(self, X_PATH, Y_PATH, ENTITIES_PATH):
        '''
        load the fundamental data X (input), Y (labels), entities (entities names)

        params:
            X_PATH: the path to input vectors
            Y_PATH: the path to labels of input vectors
            ENTITIES_PATH: the path to entities names of input vectors
        '''
        self.setup_print_times(keywords = 'entities data')
        self.X = load_data_with_pickle(X_PATH)
        self.Y = load_data_with_pickle(Y_PATH)
        self.entities = load_data_with_pickle(ENTITIES_PATH)
        self.print_loaded()
コード例 #2
0
ファイル: DatasetManager.py プロジェクト: NooneBug/MTNCI
    def load_concept_embeddings(self, CONCEPT_EMBEDDING_PATHS, data, nickel = True):
        '''
        load the concept embedding and transform that in dicts,
        for example: 
            self.concept_embedding is a dict in the format {concept_embedding_name_0: {concept_name_0: vector,
                                                                                       ...
                                                                                       concept_name_N: vector},
                                                                                      },
                                                            concept_embedding_name_1: {concept_name_0: vector,
                                                                                       ...
                                                                                       concetp_name_M: vector},
                                                                                      }
        params:
            CONCEPT_EMBEDDING_PATHS: a list of paths, each path points to a concept embedding                    
        '''

        self.setup_print_times(keywords = 'concept embeddings')

        self.concept_embeddings = {}
        self.concept_embeddings['hyperbolic'] = self.load_nickel(CONCEPT_EMBEDDING_PATHS[1])
        # self.concept_embeddings['hyperbolic'] = self.load_nickel2(CONCEPT_EMBEDDING_PATHS[1])
        self.concept_embeddings['hyperbolic'] = {k.strip(): v for k, v in self.concept_embeddings['hyperbolic'].items()}
        if nickel:
            self.concept_embeddings['distributional'] = self.load_nickel(CONCEPT_EMBEDDING_PATHS[0])
        if not nickel:
            self.concept_embeddings['distributional'] = load_data_with_pickle(CONCEPT_EMBEDDING_PATHS[0])
        
        self.print_loaded()
コード例 #3
0
ファイル: DatasetManager.py プロジェクト: NooneBug/MTNCI
    def load_concept_embeddings(self, CONCEPT_EMBEDDING_PATHS, nickel = False, cleaned = False):
        '''
        load the concept embedding and transform that in dicts,
        for example: 
            self.concept_embedding is a dict in the format {concept_embedding_name_0: {concept_name_0: vector,
                                                                                       ...
                                                                                       concept_name_N: vector},
                                                                                      },
                                                            concept_embedding_name_1: {concept_name_0: vector,
                                                                                       ...
                                                                                       concetp_name_M: vector},
                                                                                      }
        params:
            CONCEPT_EMBEDDING_PATHS: a list of paths, each path points to a concept embedding                    
        '''

        self.setup_print_times(keywords = 'concept embeddings')
        if not nickel:
            concept_embeddings = [load_data_with_pickle(x) for x in CONCEPT_EMBEDDING_PATHS]

            self.concept_embeddings = {'hyperbolic': concept_embeddings[1]}
        else:
            self.concept_embeddings = {}
            
            self.concept_embeddings['hyperbolic'] = self.load_nickel(CONCEPT_EMBEDDING_PATHS[1])
            # self.concept_embeddings['hyperbolic'] = self.load_nickel2(CONCEPT_EMBEDDING_PATHS[1])
            
        self.concept_embeddings['hyperbolic'] = {k.strip(): v for k, v in self.concept_embeddings['hyperbolic'].items()}

        self.concept_embeddings['distributional'] = load_data_with_pickle(CONCEPT_EMBEDDING_PATHS[0])
        
        if not cleaned:
        # transform gensim embedding in a dict {key: vector}
            self.concept_embeddings['distributional'] = {k: self.concept_embeddings['distributional'][k] for k in self.concept_embeddings['distributional'].wv.vocab}
        

        self.print_loaded()
コード例 #4
0
ファイル: DatasetManager.py プロジェクト: NooneBug/MTNCI
 def load_raw_dataset(self, load_path):
     self.X = load_data_with_pickle(load_path + 'X')
     self.Y = load_data_with_pickle(load_path + 'Y')
     self.entities = load_data_with_pickle(load_path + 'entities')
コード例 #5
0
        fraction = 1

        datasetManager.shuffle_dataset_and_sample(fraction=fraction,
                                                  in_place=True)

        datasetManager.split_data_by_unique_entities(
            exclude_min_threshold=exclude_min_threshold)
        print('Train: {} vectors, Val: {} vectors, Test: {} vectors'.format(
            len(datasetManager.Y_train), len(datasetManager.Y_val),
            len(datasetManager.Y_test)))

    elif load_dataset:
        print('... loading datasets ...')
        t = time.time()
        datasetManager.X_train = load_data_with_pickle(X_TRAIN_PATH)
        datasetManager.X_test = load_data_with_pickle(X_TEST_PATH)
        datasetManager.X_val = load_data_with_pickle(X_VAL_PATH)
        datasetManager.Y_train = load_data_with_pickle(Y_TRAIN_PATH)
        datasetManager.Y_test = load_data_with_pickle(Y_TEST_PATH)
        datasetManager.Y_val = load_data_with_pickle(Y_VAL_PATH)
        datasetManager.E_train = load_data_with_pickle(E_TRAIN_PATH)
        datasetManager.E_test = load_data_with_pickle(E_TEST_PATH)
        datasetManager.E_val = load_data_with_pickle(E_VAL_PATH)
        print('--- dataset loaded in {:.2f} seconds ---'.format(time.time() -
                                                                t))

    # datasetManager.plot_datasets()
    # datasetManager.print_statistic_on_dataset()

    print('... creating numeric dataset ...')