def __build_dictionary(self):
     print('Building dictionary')
     dict_words = []
     i = 0
     for text in self.data:
         i += 1
         print("Dictionary Step {} / {}".format(i, len(self.data)))
         words = NLP(text = text['content']).get_words_feature()
         dict_words.append(words)
     FileStore(filePath=settings.DICTIONARY_PATH).store_dictionary(dict_words)
    json_test = DataLoader(dataPath=settings.DATA_TEST_PATH).get_json()
    print('Load Data to JSON Done! '), str(datetime.now())

    # Feature Extraction
    print('Featuring Extraction... '),  str(datetime.now())

    # tf-idf
    features_train, labels_train = FeatureExtraction(
        data=json_train).get_data_and_label_tfidf()
    features_test, labels_test = FeatureExtraction(
        data=json_test).get_data_and_label_tfidf()
    vectorizer = TfidfVectorizer(
        use_idf=True, min_df=0.0, max_df=1.0, ngram_range=(1, 2))
    features_train = vectorizer.fit_transform(features_train)
    features_test = vectorizer.transform(features_test)
    FileStore(filePath=settings.VECTOR_EMBEDDING).save_pickle(obj=vectorizer)

    print('Feature Extraction Done! '),  str(datetime.now())

    # Save feature extraction
    features_train_dict = get_feature_dict(
        value_features=features_train, value_labels=labels_train)
    features_test_dict = get_feature_dict(
        value_features=features_test, value_labels=labels_test)
    FileStore(filePath=settings.FEATURES_TRAIN).save_pickle(
        obj=features_train_dict)
    FileStore(filePath=settings.FEATURES_TEST).save_pickle(
        obj=features_test_dict)

    print("Store data DONE!")
 def save_model(self, filePath):
     print('Saving Model... ', str(datetime.now()))
     FileStore(filePath=filePath).save_pickle(obj=self.estimator)
     print('Save Model Done! ', str(datetime.now()))
        self.__build_dataset()
        return self.features, self.labels

    def read_feature(self):
        return self.data['features'] , self.data['labels']

def get_feature_dict(value_features,value_labels):
    return {
            "features":value_features,
            "labels":value_labels
        }

if __name__ == '__main__':
    print('Reading data raw... ',  str(datetime.now()))
    json_train = DataLoader(dataPath=settings.DATA_TRAIN_PATH).get_json_train()
    FileStore(filePath=settings.DATA_TRAIN_JSON, data=json_train).store_json()
    json_test = DataLoader(dataPath=settings.DATA_TEST_PATH).get_json_test(2000)
    FileStore(filePath=settings.DATA_TEST_JSON, data=json_test).store_json()
    print('Load Data to JSON Done! ', str(datetime.now()))
          
    # Load data after preprocess 
    # train_loader = FileReader(filePath=settings.DATA_TRAIN_JSON)
    # test_loader = FileReader(filePath=settings.DATA_TEST_JSON)
    # json_train = train_loader.read_json()
    # json_test = test_loader.read_json()

    # Feature Extraction
    print('Featuring Extraction... ',  str(datetime.now()))
    # Bow
    # features_train, labels_train = FeatureExtraction(data=json_train).get_data_and_label_bow()
    # features_test, labels_test = FeatureExtraction(data=json_test).get_data_and_label_bow()