コード例 #1
0
ファイル: crawler.py プロジェクト: alphadx/YahooQAGeter
	def makeFinalXML(self, filtered = False):
		self.getAll(filtered = filtered)
		salida = E("question",qid = self.qid,user = self.user[0],idUser = str(self.user[1]),pages = str(self.pageCount),url = self.url,urlCanonical = self.urlCanonical,canonical = self.canonical, date = datetime.strftime(self.date,"%Y-%m-%d %H:%M:%S"), followers = str(self.followers), crawledDate = datetime.strftime(self.crawledDate,"%Y-%m-%d %H:%M:%S"))
		body = E("body",type = "container")
		for idx,elemento in enumerate(self.body):
			if(idx == 0):
				body.append(E("question",elemento, type = "body"))
			else:
				body.append(E("question", elemento, type = "update"))
		title = E("title",self.title)
		keywords = E("keywords")
		for elemento in self.keywords:
			keywords.append(E("keyword",elemento))
		categories = E("categories")
		for idx,elemento in enumerate(self.categories):
			categories.append(E("category",elemento[1], categoryId = str(elemento[0]), level = str(idx+1)))
		answers = E("answers")
		if(hasattr(self, 'bestAnswer')):
			bestAnswer = (E("answer", self.bestAnswer['text'], type = "bestAnswer", user= self.bestAnswer['user'], idUser= str(self.bestAnswer['idUser']), id= str(self.bestAnswer['id']), relativeDate= str(self.bestAnswer['relativeDate']), thumbUp= str(self.bestAnswer['thumbUp']), thumbDown= str(self.bestAnswer['thumbDown']), topContributor = str(self.bestAnswer['topContributor']), postDate = str(self.bestAnswer['postDate']) ))
			links = E("links")
			for link in self.bestAnswer["links"]:
				links.append(E("link",link))
			images = E("images")
			for image in self.bestAnswer["images"]:
				images.append(E("image",image))
			bestAnswer.append(links)
			bestAnswer.append(images)
			answers.append(bestAnswer)
		for elemento in self.answers:
			answer = (E("answer", elemento['text'], type = "answer", user= elemento['user'], idUser= str(elemento['idUser']), id= str(elemento['id']), relativeDate= str(elemento['relativeDate']), thumbUp= str(elemento['thumbUp']), thumbDown= str(elemento['thumbDown']), topContributor = str(elemento['topContributor']), postDate = str(elemento['postDate']) ))
			links = E("links")
			for link in elemento["links"]:
				links.append(E("link",link))
			images = E("images")
			for image in elemento["images"]:
				images.append(E("image",image))
			answer.append(links)
			answer.append(images)
			answers.append(answer)
		salida.append(title)
		salida.append(body)
		salida.append(keywords)
		salida.append(categories)
		salida.append(answers)
		self.finalXML = salida
		self.finalXMLString = etree.tostring(salida,pretty_print=True, xml_declaration=True, encoding='UTF-8')
		self.finalXMLJSON = xml2json.xml_to_json(self.finalXML)
		self.finalXMLRAWString = etree.tostring(self.XMLRAW,pretty_print=True, xml_declaration=True, encoding='UTF-8')
コード例 #2
0
ファイル: api.py プロジェクト: ipedrazas/deep
def before_insert(resource_name, items):
    for item in items:
        item['report'] = xml_to_json(item['report'])
        print str(item)
コード例 #3
0
    def makeFinalXML(self, filtered=False):
        self.getAll(filtered=filtered)
        salida = E("question",
                   qid=self.qid,
                   user=self.user[0],
                   idUser=str(self.user[1]),
                   pages=str(self.pageCount),
                   url=self.url,
                   urlCanonical=self.urlCanonical,
                   canonical=self.canonical,
                   date=datetime.strftime(self.date, "%Y-%m-%d %H:%M:%S"),
                   followers=str(self.followers),
                   crawledDate=datetime.strftime(self.crawledDate,
                                                 "%Y-%m-%d %H:%M:%S"))
        body = E("body", type="container")
        for idx, elemento in enumerate(self.body):
            if (idx == 0):
                body.append(E("question", elemento, type="body"))
            else:
                body.append(E("question", elemento, type="update"))
        title = E("title", self.title)
        keywords = E("keywords")
        for elemento in self.keywords:
            keywords.append(E("keyword", elemento))
        categories = E("categories")
        for idx, elemento in enumerate(self.categories):
            categories.append(
                E("category",
                  elemento[1],
                  categoryId=str(elemento[0]),
                  level=str(idx + 1)))
        answers = E("answers")
        if (hasattr(self, 'bestAnswer')):
            bestAnswer = (E("answer",
                            self.bestAnswer['text'],
                            type="bestAnswer",
                            user=self.bestAnswer['user'],
                            idUser=str(self.bestAnswer['idUser']),
                            id=str(self.bestAnswer['id']),
                            relativeDate=str(self.bestAnswer['relativeDate']),
                            thumbUp=str(self.bestAnswer['thumbUp']),
                            thumbDown=str(self.bestAnswer['thumbDown']),
                            topContributor=str(
                                self.bestAnswer['topContributor']),
                            postDate=str(self.bestAnswer['postDate'])))
            links = E("links")
            for link in self.bestAnswer["links"]:
                links.append(E("link", link))
            images = E("images")
            for image in self.bestAnswer["images"]:
                images.append(E("image", image))
            bestAnswer.append(links)
            bestAnswer.append(images)
            answers.append(bestAnswer)
        for elemento in self.answers:
            answer = (E("answer",
                        elemento['text'],
                        type="answer",
                        user=elemento['user'],
                        idUser=str(elemento['idUser']),
                        id=str(elemento['id']),
                        relativeDate=str(elemento['relativeDate']),
                        thumbUp=str(elemento['thumbUp']),
                        thumbDown=str(elemento['thumbDown']),
                        topContributor=str(elemento['topContributor']),
                        postDate=str(elemento['postDate'])))
            links = E("links")
            for link in elemento["links"]:
                links.append(E("link", link))
            images = E("images")
            for image in elemento["images"]:
                images.append(E("image", image))
            answer.append(links)
            answer.append(images)
            answers.append(answer)

        salida.append(title)
        salida.append(body)
        salida.append(keywords)
        salida.append(categories)
        salida.append(answers)
        self.finalXML = salida
        self.finalXMLString = etree.tostring(salida,
                                             pretty_print=True,
                                             xml_declaration=True,
                                             encoding='UTF-8')
        self.finalXMLJSON = xml2json.xml_to_json(self.finalXML)
        self.finalXMLRAWString = etree.tostring(self.XMLRAW,
                                                pretty_print=True,
                                                xml_declaration=True,
                                                encoding='UTF-8')