コード例 #1
0
    def _load_list(self):
        self._list = []

        filename = common.get_list_file_name()
        path = os.path.join(self._path_dir, filename)

        with open(path) as f:
            write = False  #no doc has been read yet: there's nothing to write
            while True:
                line = f.readline()
                if line == '':
                    self._add_to_list(release_id, date, title,
                                      url)  #flush the last doc
                    break
                if line.startswith('LOTW-ID: '):  #found new doc
                    if write:  #write if there is a previous doc to write
                        self._add_to_list(release_id, date, title, url)
                    release_id = line[9:].strip()
                    write = True
                elif line.startswith('LOTW-DATE: '):
                    date = line[11:].strip()
                elif line.startswith('LOTW-TITLE: '):
                    title = line[12:].strip()
                elif line.startswith('LOTW-URL: '):
                    url = line[10:].strip()
コード例 #2
0
ファイル: releases.py プロジェクト: ic4f/phd-code
    def _load(self, company_id):
        cfr = ConfigReader()
        p1 = cfr.get('ROOT_ORIGINAL')
        p2 = cfr.get('FORMATTED_PR')
        p3 = os.path.join(p1, p2)
        metafile = common.get_list_file_name(company_id)

        path_text = os.path.join(p3, str(company_id))
        path_meta = os.path.join(p3, metafile)

        pr_text = self._load_text(path_text)
        self._load_meta(path_meta, pr_text)
コード例 #3
0
    def _write_files(self):
        list_file = common.get_list_file_name(self._company_id)
        path_list = os.path.join(self._output_path, list_file)
        path_text = os.path.join(self._output_path, self._company_id)

        dc = DataCleaner('utf-8')
        clean_list = dc.clean(''.join(self._sb_list))
        clean_text = dc.clean(''.join(self._sb_text))

        with open(path_list, 'w') as f:
            f.write(clean_list)
        with open(path_text, 'w') as f:
            f.write(clean_text)
コード例 #4
0
    def _write_files(self, sb_list, sb_text):
        list_file = common.get_list_file_name(self._company_id)
        path_list = os.path.join(self._output_path, list_file)  #refactor
        path_text = os.path.join(self._output_path, self._company_id)

        clean_list = self._dc.clean(''.join(sb_list))
        clean_text = ''.join(sb_text)  #this has been cleaned in _format_body()

        with open(path_list, 'w') as f:
            f.write(clean_list)

        with open(path_text, 'w') as f:
            f.write(clean_text)
コード例 #5
0
ファイル: crawler.py プロジェクト: ic4f/phd-code
 def _write_list(self):
     path = os.path.join(self._path_dir, common.get_list_file_name())
     with open(path, 'w') as f:
         f.write(''.join(self._sb))