コード例 #1
0
ファイル: miner.py プロジェクト: tirami/udadisi-rss
 def mine(self, text, time_created, link_url):
     try:
         terms_dict = extract.extract_terms(text)
         now = datetime.now().strftime('%Y%m%d%H%M')
         t = time_created.strftime('%Y%m%d%H%M')
         post = RssMiner.dict_of_post(link_url, terms_dict, t, now)
         batch = RssMiner.package_batch_to_json(self.category.id, [post])
         self.send_to_parent(self.category.parent_id, batch)
         self.mined_posts_hashes.append(hash)
     except Exception as e:
         print e.message, e.args
コード例 #2
0
ファイル: miner.py プロジェクト: tirami/udadisi-rss
 def mine(self, text, time_created, link_url):
     try:
         terms_dict = extract.extract_terms(text)
         now = datetime.now().strftime('%Y%m%d%H%M')
         t = time_created.strftime('%Y%m%d%H%M')
         post = RssMiner.dict_of_post(link_url, terms_dict, t, now)
         batch = RssMiner.package_batch_to_json(self.category.id, [post])
         self.send_to_parent(self.category.parent_id, batch)
         self.mined_posts_hashes.append(hash)
     except Exception as e:
         print e.message, e.args
コード例 #3
0
    def run(self):
        self.log("Starting mining.")
        urls = self.category.urls.split(',')
        for url in urls:
            try:
                visible_text, last_modified = self.download_page(url)
                text_hash = hashlib.sha1(visible_text.encode('utf-8'))
                if text_hash not in self.mined_posts_hashes:
                    terms_dict = extract.extract_terms(visible_text)
                    now = datetime.now().strftime('%Y%m%d%H%M')
                    time = last_modified.strftime('%Y%m%d%H%M')
                    post = WebsiteMiner.dict_of_post(url, terms_dict, time, now)
                    batch = WebsiteMiner.package_batch_to_json(self.category.id, [post])
                    self.send_to_parent(self.category.parent_id, batch)
                    self.mined_posts_hashes.append(hash)
                else:
                    print("Post already mined.")

            except Exception as e:
                print e.message, e.args
コード例 #4
0
    def run(self):
        self.log("Starting mining.")
        urls = self.category.urls.split(',')
        for url in urls:
            try:
                visible_text, last_modified = self.download_page(url)
                text_hash = hashlib.sha1(visible_text.encode('utf-8'))
                if text_hash not in self.mined_posts_hashes:
                    terms_dict = extract.extract_terms(visible_text)
                    now = datetime.now().strftime('%Y%m%d%H%M')
                    time = last_modified.strftime('%Y%m%d%H%M')
                    post = WebsiteMiner.dict_of_post(url, terms_dict, time,
                                                     now)
                    batch = WebsiteMiner.package_batch_to_json(
                        self.category.id, [post])
                    self.send_to_parent(self.category.parent_id, batch)
                    self.mined_posts_hashes.append(hash)
                else:
                    print("Post already mined.")

            except Exception as e:
                print e.message, e.args