def evaluate(self,evaluation_set=None):
		"""
		Evaluates the classifier on the given evaluation_set and returns an EvaluationResult object. If no evaluation set is provided, the
		evaluation corpus is used.
		"""
		#if (evaluation_set == None):			
		#	evaluation_set = [(sent, sent.certainty) for sent in self._evalcorpus.sents()]
		#evaluation_set = evaluation_set[0:50]
		eval_featuresets = [self.sentenceFeatures(sent) for sent in evaluation_set]
		results = self._classifier.batch_classify(eval_featuresets)
		labels = [s.certainty for s in evaluation_set]
		tp,fp,tn,fn = 0,0,0,0
		false_pos_sents = []
		false_neg_sents = []
		for (s,r,l) in zip(evaluation_set,results,labels):
			if r=='uncertain' and r==l:
				tp += 1
			elif r=='uncertain' and r!=l:
				fp += 1
				false_pos_sents.append(s)
				debug('FP:'+s.id.encode('UTF-8')+': '+s.string.encode('UTF-8'))
			elif r=='certain' and r==l:
				tn += 1
			elif r=='certain' and r!=l:
				fn += 1
				false_neg_sents.append(s)
				#debug('FN:'+s.id.encode('UTF-8')+': '+s.string.encode('UTF-8'))
			else:
				raise Exception("Evaluation error!")		
		debug('Size of evaluation set: '+ str(len(evaluation_set))	)
		return EvaluationResult(tp,fp,tn,fn)
    def evaluate(self, evaluation_set=None):
        """
		Evaluates the classifier on the given evaluation_set and returns an EvaluationResult object. If no evaluation set is provided, the
		evaluation corpus is used.
		"""
        #if (evaluation_set == None):
        #	evaluation_set = [(sent, sent.certainty) for sent in self._evalcorpus.sents()]
        #evaluation_set = evaluation_set[0:50]
        eval_featuresets = [
            self.sentenceFeatures(sent) for sent in evaluation_set
        ]
        results = self._classifier.batch_classify(eval_featuresets)
        labels = [s.certainty for s in evaluation_set]
        tp, fp, tn, fn = 0, 0, 0, 0
        false_pos_sents = []
        false_neg_sents = []
        for (s, r, l) in zip(evaluation_set, results, labels):
            if r == 'uncertain' and r == l:
                tp += 1
            elif r == 'uncertain' and r != l:
                fp += 1
                false_pos_sents.append(s)
                debug('FP:' + s.id.encode('UTF-8') + ': ' +
                      s.string.encode('UTF-8'))
            elif r == 'certain' and r == l:
                tn += 1
            elif r == 'certain' and r != l:
                fn += 1
                false_neg_sents.append(s)
                #debug('FN:'+s.id.encode('UTF-8')+': '+s.string.encode('UTF-8'))
            else:
                raise Exception("Evaluation error!")
        debug('Size of evaluation set: ' + str(len(evaluation_set)))
        return EvaluationResult(tp, fp, tn, fn)
	def _build_bow_features(self,training_set):
		"""
		Builds the feature set that will be considered by the classifier. This is a Bag of Words approach. 
		The features are taken from the 'size' most common words after the first 'stop_words_size' most common, which are left out.
		Note that this set is dependant on the given training_set
		"""		
		#build features
		tks = []
		if self._useBOL:
			debug('Using word lemmas as features for the Bag of Words.')
			for sent in training_set:
				for (word,lemma,pos,chunk,ne) in sent.genia_words:
					tks.append(lemma.lower())		
		else:
			for sent in training_set:
				for (word,lemma,pos,chunk,ne) in sent.genia_words:
					tks.append(word.lower())		
		counted_words = Counter(tks)	
		most_common_words = [word[0] for word in counted_words.most_common(self._mostCommonSize)]
		too_common_words = [word[0] for word in counted_words.most_common(self._stopWordsSize)]
		debug("Words excluded from dictionary:")
		debug(too_common_words)
		self._feature_words = list(set(most_common_words) - set(too_common_words))		
		self._empty_feature_set = dict()
		self._empty_int_set = dict()
		for i in range(0,len(self._feature_words)):
			#debug(self._feature_words[i] + '=>' +str(i))
			self._empty_feature_set[self._feature_words[i]] = i
			self._empty_int_set[(i)] = 0		
		debug('Size of dictionary: '+ str(len(self._empty_feature_set)))
    def _build_bow_features(self, training_set):
        """
		Builds the feature set that will be considered by the classifier. This is a Bag of Words approach. 
		The features are taken from the 'size' most common words after the first 'stop_words_size' most common, which are left out.
		Note that this set is dependant on the given training_set
		"""
        #build features
        tks = []
        if self._useBOL:
            debug('Using word lemmas as features for the Bag of Words.')
            for sent in training_set:
                for (word, lemma, pos, chunk, ne) in sent.genia_words:
                    tks.append(lemma.lower())
        else:
            for sent in training_set:
                for (word, lemma, pos, chunk, ne) in sent.genia_words:
                    tks.append(word.lower())
        counted_words = Counter(tks)
        most_common_words = [
            word[0] for word in counted_words.most_common(self._mostCommonSize)
        ]
        too_common_words = [
            word[0] for word in counted_words.most_common(self._stopWordsSize)
        ]
        debug("Words excluded from dictionary:")
        debug(too_common_words)
        self._feature_words = list(
            set(most_common_words) - set(too_common_words))
        self._empty_feature_set = dict()
        self._empty_int_set = dict()
        for i in range(0, len(self._feature_words)):
            #debug(self._feature_words[i] + '=>' +str(i))
            self._empty_feature_set[self._feature_words[i]] = i
            self._empty_int_set[(i)] = 0
        debug('Size of dictionary: ' + str(len(self._empty_feature_set)))
	def train(self,training_set=None):
		"""
		Trains the BOW NaiveBayes classifier.
		"""
		if (training_set == None):			
			training_set = [(sent, sent.certainty) for sent in self._corpus.sents()]
		#training_set = training_set[0:10] #para comparar con los resultados anteriores
		#build features		
		self._build_bow_features(training_set)
		
		#build featuresets for each sentence
		labeled_featuresets = []
		for sent in training_set:
			featureset = self.sentenceFeatures(sent)
			labeled_featuresets.append((featureset,sent.certainty))

		debug('Size of training set: '+str(len(labeled_featuresets)))
		#pp = pprint.PrettyPrinter(indent=4)
		#pp.pprint(labeled_featuresets)
		#train the NaiveBayes
		self._classifier = NaiveBayesClassifier.train(labeled_featuresets)
    def train(self, training_set=None):
        """
		Trains the BOW NaiveBayes classifier.
		"""
        if (training_set == None):
            training_set = [(sent, sent.certainty)
                            for sent in self._corpus.sents()]
        #training_set = training_set[0:10] #para comparar con los resultados anteriores
        #build features
        self._build_bow_features(training_set)

        #build featuresets for each sentence
        labeled_featuresets = []
        for sent in training_set:
            featureset = self.sentenceFeatures(sent)
            labeled_featuresets.append((featureset, sent.certainty))

        debug('Size of training set: ' + str(len(labeled_featuresets)))
        #pp = pprint.PrettyPrinter(indent=4)
        #pp.pprint(labeled_featuresets)
        #train the NaiveBayes
        self._classifier = NaiveBayesClassifier.train(labeled_featuresets)