コード例 #1
0
    def accumulate_body(self):
        """
        returns bodystring extracted from this mail
        """
        #TODO: don't hardcode which part is considered body but allow toggle
        #      commands and a config default setting

        return extract_body(self.get_email())
コード例 #2
0
 def get_email(self):
     path = self.message.get_filename()
     warning = "Subject: Caution!\n"\
               "Message file is no longer accessible:\n%s" % path
     if not self._email:
         try:
             with open(path) as f:
                 self._email = email.message_from_file(f)
         except IOError:
             self._email = email.message_from_string(warning)
     return extract_body(self._email)
コード例 #3
0
 def get_text_content(self):
     return extract_body(self.get_email(), types=['text/plain'])
コード例 #4
0
ファイル: message.py プロジェクト: SuperScript/alot
 def accumulate_body(self):
     """
     returns bodystring extracted from this mail
     """
     #TODO: allow toggle commands to decide which part is considered body
     return extract_body(self.get_email())
コード例 #5
0
	def read_data(self, name, type = 1):
		#print "extract: " + self.path_data + name
		ruta = ""
		if type == 1:
			ruta = self.path_data + "train/" + name
		elif type == 2:
			ruta = self.path_data + "test/" + name
		elif type == 3:
			ruta = self.path_data + "first3/" + name
		reuters = et.parse(ruta, et.XMLParser(encoding='ISO-8859-1')).getroot()
		extract_labels = False
		#print reuters
		#for reuters in xml.findall('REUTERS'):
		#	print reuters
		matrix = []
		for text in reuters.findall("TEXT"):
			body = utils.extract_body(text)
			if body != "" and body != None:
				extract_labels = True
			#if extract_labels == True:
				labels_temp = np.zeros(config.label_size)
				all_labels = 0
				for a_topic in reuters.findall("TOPICS"):
					for a_d in a_topic.findall("D"):
						try:
							label_index = utils.find_label_index(a_d.text)
							labels_temp[label_index] = 1.0
							self.label_examples[label_index] += 1
							all_labels += 1
						except ValueError:
							extract_labels = True
				for a_topic in reuters.findall("PLACES"):
					for a_d in a_topic.findall("D"):
						try:
							label_index = utils.find_label_index(a_d.text)
							labels_temp[label_index] = 1.0
							self.label_examples[label_index] += 1
							all_labels += 1
						except ValueError:
							extract_labels = True
				for a_topic in reuters.findall("PEOPLE"):
					for a_d in a_topic.findall("D"):
						try:
							label_index = utils.find_label_index(a_d.text)
							labels_temp[label_index] = 1.0
							self.label_examples[label_index] += 1
							all_labels += 1
						except ValueError:
							extract_labels = True
				for a_topic in reuters.findall("ORGS"):
					for a_d in a_topic.findall("D"):
						try:
							label_index = utils.find_label_index(a_d.text)
							labels_temp[label_index] = 1.0
							self.label_examples[label_index] += 1
							all_labels += 1
						except ValueError:
							extract_labels = True
				for a_topic in reuters.findall("EXCHANGES"):
					for a_d in a_topic.findall("D"):
						try:
							label_index = utils.find_label_index(a_d.text)
							labels_temp[label_index] = 1.0
							self.label_examples[label_index] += 1
							all_labels += 1
						except ValueError:
							extract_labels = True
				if all_labels != 0:
					#print("READ...")
					self.labels_train = np.append(self.labels_train, labels_temp)
					self.texts_train = np.append(self.texts_train, utils.stop_characters(body.text))
					extract_labels = False
				else:
					extract_labels = False