예제 #1
0
    def prepare_data(self, dataset):
        trainFeaturesExtractor = FeaturesExtractor(
            self.configFileFeaturesExtractor,
            self.trainFeaturesSerializationFile,
            self.trainLabelsSerializationFile,
            self.languageModel,
            dataset,
            sentiment_features=True)
        trainFeaturesExtractor.ExtractNumTfFeatures(init_dicts())

        maxid = max([max(i.keys()) for i in trainFeaturesExtractor.features])

        X = []
        Y = []

        for i, item in enumerate(trainFeaturesExtractor.features):
            itemx = [0 for _ in range(maxid)]
            l = [0, 0, 0]
            l[trainFeaturesExtractor.labels[i] - 1] = 1

            for j in trainFeaturesExtractor.features[i]:
                v = trainFeaturesExtractor.features[i][j]
                itemx[j - 1] = v

            X.append(itemx)
            Y.append(trainFeaturesExtractor.labels[i])
        trainFeaturesExtractor.dataset = []
        trainFeaturesExtractor.features = []
        trainFeaturesExtractor.labels = []
        return X, Y
	def prepare_data(self, dataset):
		trainFeaturesExtractor = FeaturesExtractor(self.configFileFeaturesExtractor, self.trainFeaturesSerializationFile, 
													self.trainLabelsSerializationFile, self.languageModel, dataset, 
													sentiment_features=True)

		trainFeaturesExtractor.ExtractNumTfFeatures(sentiment_dict=init_dicts(), sparse=True)


		X= trainFeaturesExtractor.sparse_features
		Y = np.array(trainFeaturesExtractor.labels)

		
		trainFeaturesExtractor.dataset = []
		trainFeaturesExtractor.features = []
		trainFeaturesExtractor.labels = []
		return X, Y