예제 #1
0
    def __init__(self):
        self.news = set()
        self.updatedNews = set()
        self.newsList = []
        f = open(FILE_NAME)
        for line in f:
            self.news.add(line[0:-1])

        f.close()
        self.file = open(FILE_NAME, "wb")
        for s in NEWSCATALOG["data"][1]["list"]:
            newdb.update({'newCatalog': s, 'newsList': []})
예제 #2
0
    def __init__(self):
        self.news = set()
        self.updatedNews = set()
        self.newsList = []
        f = open(FILE_NAME)
        for line in f:
            self.news.add(line[0:-1])

        f.close()
        self.file = open(FILE_NAME, "wb")
        for s in NEWSCATALOG["data"][1]["list"]:
            newdb.update({'newCatalog': s, 'newsList': []})
예제 #3
0
    def process_item(self, item, spider):
        self.updatedNews.add(json.dumps(dict(item), ensure_ascii=False, sort_keys=True).encode('utf-8'))
        line = json.dumps(dict(item), ensure_ascii=False, sort_keys=True).encode('utf-8') + "\n"
        
        if self.currentCatalog != item["newCatalog"] and self.currentCatalog != "":
            newdb.update({'newCatalog': self.currentCatalog, 'newsList': self.currentCatalogList})
            self.currentCatalogList = []
        else :
            self.currentCatalogList.append(dict(item))
        self.currentCatalog = item["newCatalog"]

        self.file.write(line)
        return item