Ejemplo n.º 1
0
    def write_new_file(self, url, title, source, i, time):
        ok = 0
        content = '''
                    <html>
                        <head> 
                           <meta charset="utf-8">
                           <meta name="keywords" content="estarinfo">
                           <title>''' + title + '''</title>
                        </head> 
                        <body>
                            <h1 class="title">''' + title + '''</h1>
                            <span class="time">''' + time + '''</span>
                            <span class="source">130679</span>
                            <div class="article">''' + source + '''</div>
                        </body>
                    </html>
                '''
        page_text = url + '\n' + title + '\n130679\n\n\n\n' + content
        if self.debug:
            print('count:', self.total, ' === ', title, ' ===')

        if '' == self._dir:
            self.djckb_mkdir()

        filename = self._dir + 'iask_' + str(i) + '_' + str(len(self.d)) + '.htm-2'
        for num in range(2):
            if 1 == crawlerfun.write_file(filename, page_text, ifdisplay = 0):
                fileName = '/root/Downloads/djckb/' + 'iask_' + str(i) + '_' + str(len(self.d)) + '.htm-2'
                crawlerfun.write_file(fileName, page_text, ifdisplay = 0)  # 再次保存到/root/Downloads目录下
                ok = 1
                break
            else:  # 有时目录会被c程序删掉
                crawlerfun.mkdir(self._dir)

        return ok
Ejemplo n.º 2
0
    def write_new_file(self, url, title, source, i, time, id):
        content = '''
                <html>
                    <head> 
                       <meta charset="utf-8">
                       <meta name="keywords" content="estarinfo">
                       <title>''' + title + '''</title>
                    </head> 
                    <body>
                        <h1 class="title">''' + title + '''</h1>
                        <span class="time">''' + time + '''</span>
                        <span class="source">''' + str(id) + '''</span>
                        <div class="article">''' + source + '''</div>
                    </body>
                </html>
                '''
        page_text = url + '\n' + title + '\n' + str(id) + '\n\n\n\n' + content

        if self.debug:
            print('count:', self.i, ' --- ', title)

        if '' == self._dir:
            self.crawl_mkdir()

        filename = self._dir + 'iask_' + str(i) + '_' + str(len(self.d)) + '.htm-2'
        for num in range(2):
            if 1 == crawlerfun.write_file(filename, page_text, ifdisplay = 0):
                break
            else:  # 有时目录会被c程序删掉
                crawlerfun.mkdir(self._dir)
Ejemplo n.º 3
0
    def write_new_file(self, url, title, source, i, time, id):
        if self.debug:
            print('count:', self.i, ' --- ', title)

        content = '''
                    <html>
                        <head> 
                           <meta charset="utf-8">
                           <meta name="keywords" content="estarinfo">
                           <title>''' + title + '''</title>
                        </head> 
                        <body>
                            <h1 class="title">''' + title + '''</h1>
                            <span class="time">''' + time + '''</span>
                            <span class="source">''' + str(id) + '''</span>
                            <div class="article">''' + source + '''</div>
                        </body>
                    </html>
                '''
        page_text = url + '\n' + title + '\n' + str(id) + '\n\n\n\n' + content

        if '' == self._dir:
            self.jlwb_mkdir()

        filename = self._dir + 'iask_' + str(i) + '_' + str(len(
            self.d)) + '.htm-2'
        for num in range(2):
            if 1 == crawlerfun.write_file(filename, page_text, ifdisplay=0):
                savePath = '/root/estar_save/jlwb/'
                if not os.path.exists(savePath):
                    os.makedirs(savePath)
                fileName = savePath + 'iask_' + str(i) + '_' + str(len(
                    self.d)) + '.htm-2'
                crawlerfun.write_file(fileName, page_text,
                                      ifdisplay=0)  # 再次保存到/root/estar_save目录下

                break
            else:  # 有时目录会被c程序删掉
                crawlerfun.mkdir(self._dir)
Ejemplo n.º 4
0
    def write_new_file(self, url, title, source, i, id, time):
        dct = {1170771: '国家电网有限公司', 1170773: '中国南方电网供应链统一服务平台'}

        ok = 0
        content = '''
                    <html>
                        <head> 
                           <meta charset="utf-8">
                           <meta name="keywords" content="estarinfo">
                        </head> 
                        <body>
                            <h1 class="title">''' + title + '''</h1>
                            <span class="time">''' + time + '''</span>
                            <span class="source">''' + dct[id] + '''</span>
                            <div class="article">''' + source + '''</div>
                        </body>
                    </html>
                '''
        page_text = url + '\n' + title + '\n' + str(id) + '\n\n\n' + content
        print(title)
        if '' == self._dir:
            self.guojiadianwang_mkdir()

        filename = self._dir + 'iask_' + str(i) + '_' + str(len(
            self.d)) + '.htm-2'
        for num in range(2):
            if 1 == crawlerfun.write_file(filename, page_text, ifdisplay=0):
                filename = '/root/estar_save/' + 'iask_' + str(i) + '_' + str(
                    len(self.d)) + '.htm-2'
                crawlerfun.write_file(filename, page_text,
                                      ifdisplay=0)  # 再次保存到/root/estar_save目录下
                ok = 1
                break
            else:  # 有时目录会被c程序删掉
                crawlerfun.mkdir(self._dir)

        return ok
Ejemplo n.º 5
0
    def write_new_file(self, current_url, current_title, page_source, i):
        ok = 0
        # 合成铱星文件格式
        # page_source = current_url + '\n' + current_title + '\n' + '1152937' + '\n\n\n\n' + page_source
        page_text = current_url + '\n' + current_title + '\n0\n\n\n\n' + page_source
        # iask_3_7830.htm-2  #iask_[ip数字]_[采集文章序号].htm-[线程号]
        if '' == self._dir:
            self.baijiahao_mkdir()

        # filename = self._dir + 'iask_' + str(page) + '_' + str(self.total.num) + '.htm-2'
        filename = self._dir + 'iask_' + +str(ts) + '_' + str(i) + '.html'
        for num in range(2):
            if 1 == crawlerfun.write_file(filename, page_text, ifdisplay=0):
                ok = 1
                break
            else:  # 有时目录会被c程序删掉
                crawlerfun.mkdir(self._dir)

        return ok