def prepare_data(self, dataset): trainFeaturesExtractor = FeaturesExtractor( self.configFileFeaturesExtractor, self.trainFeaturesSerializationFile, self.trainLabelsSerializationFile, self.languageModel, dataset, sentiment_features=True) trainFeaturesExtractor.ExtractNumTfFeatures(init_dicts()) maxid = max([max(i.keys()) for i in trainFeaturesExtractor.features]) X = [] Y = [] for i, item in enumerate(trainFeaturesExtractor.features): itemx = [0 for _ in range(maxid)] l = [0, 0, 0] l[trainFeaturesExtractor.labels[i] - 1] = 1 for j in trainFeaturesExtractor.features[i]: v = trainFeaturesExtractor.features[i][j] itemx[j - 1] = v X.append(itemx) Y.append(trainFeaturesExtractor.labels[i]) trainFeaturesExtractor.dataset = [] trainFeaturesExtractor.features = [] trainFeaturesExtractor.labels = [] return X, Y
def prepare_data(self, dataset): trainFeaturesExtractor = FeaturesExtractor(self.configFileFeaturesExtractor, self.trainFeaturesSerializationFile, self.trainLabelsSerializationFile, self.languageModel, dataset, sentiment_features=True) trainFeaturesExtractor.ExtractNumTfFeatures(sentiment_dict=init_dicts(), sparse=True) X= trainFeaturesExtractor.sparse_features Y = np.array(trainFeaturesExtractor.labels) trainFeaturesExtractor.dataset = [] trainFeaturesExtractor.features = [] trainFeaturesExtractor.labels = [] return X, Y