コード例 #1
0
    def process_item(self, item, spider):
        if(isinstance(item, Article)):
            content1 = item["content"].encode("utf-8")
            f = Filter()
            text = f.filterResult(item["title"], item["content"])

            if text:
                # print "text:"+text[1]+text[2]+text[3]
                money1 = text[2]
                org1 = text[1]
                date1 = text[3]
                date2 = date(date1[0], date1[1], date1[2])
                a = RawData(title=text[0],
                content=item["content"].encode("utf-8"),
                spider=item["spider"].encode("utf-8"),
                org=org1,
                money=money1,
                date=date2)
                self.session.add(a)
                self.session.commit()
            id1 = item["id"]
            name1 = item["spider"].encode("utf-8")
            title1 = item["title"].encode("utf-8")
            currentURL = item["currentURL"].encode("utf-8")
            sql1 = "INSERT INTO `status` (`crawler_id`, `current_title` ,`current_url`, `name`) VALUES (" + str(id1) + ",'" + title1 + "','" + currentURL +"','" + name1 +"') ON DUPLICATE KEY UPDATE `current_title`='" + title1 + "',`current_url`='" + currentURL +"',`name`='" + name1 + "',`timestamp`=NOW()"
            engine.execute(sql1)
        else:
            currentPage = item["currentPage"].encode("utf-8")
            id = item["id"]
            name = item["name"].encode("utf-8")
            sql = "INSERT INTO `status` (`crawler_id`, `current_page`) VALUES (" + str(id) + ",'" + currentPage + "') ON DUPLICATE KEY UPDATE `current_page`='" + currentPage + "'"
            engine.execute(sql)
コード例 #2
0
    def process_item(self, item, spider):
        if (isinstance(item, Article)):
            content1 = item["content"].encode("utf-8")
            f = Filter()
            text = f.filterResult(item["title"], item["content"])

            if text:
                # print "text:"+text[1]+text[2]+text[3]
                money1 = text[2]
                org1 = text[1]
                date1 = text[3]
                date2 = date(date1[0], date1[1], date1[2])
                a = RawData(title=text[0],
                            content=item["content"].encode("utf-8"),
                            spider=item["spider"].encode("utf-8"),
                            org=org1,
                            money=money1,
                            date=date2)
                self.session.add(a)
                self.session.commit()
            id1 = item["id"]
            name1 = item["spider"].encode("utf-8")
            title1 = item["title"].encode("utf-8")
            currentURL = item["currentURL"].encode("utf-8")
            sql1 = "INSERT INTO `status` (`crawler_id`, `current_title` ,`current_url`, `name`) VALUES (" + str(
                id1
            ) + ",'" + title1 + "','" + currentURL + "','" + name1 + "') ON DUPLICATE KEY UPDATE `current_title`='" + title1 + "',`current_url`='" + currentURL + "',`name`='" + name1 + "',`timestamp`=NOW()"
            engine.execute(sql1)
        else:
            currentPage = item["currentPage"].encode("utf-8")
            id = item["id"]
            name = item["name"].encode("utf-8")
            sql = "INSERT INTO `status` (`crawler_id`, `current_page`) VALUES (" + str(
                id
            ) + ",'" + currentPage + "') ON DUPLICATE KEY UPDATE `current_page`='" + currentPage + "'"
            engine.execute(sql)
コード例 #3
0
 def open_spider(self, spider):
     self.session = DBSession()
     sql2 = "DELETE FROM `RawData` WHERE `spider` = '" + spider.name + "'"
     engine.execute(sql2)
コード例 #4
0
 def open_spider(self, spider):
     self.session = DBSession()
     sql2 = "DELETE FROM `RawData` WHERE `spider` = '" + spider.name + "'"
     engine.execute(sql2)