def download_file(url, dest=None, filename=None): u = request.urlopen(url) if not filename: filename = 'downloaded.file' if dest: filename = os.path.join(dest, filename) with open(filename, 'wb') as fp: meta = u.info() meta_func = meta.getheaders if hasattr(meta, 'getheaders') else meta.get_all meta_length = meta_func('Content-Length') file_size = None if meta_length: file_size = int(meta_length[0]) logger.info('Downloading: {0} Bytes: {1}'.format(url, file_size)) file_size_dl = 0 block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) fp.write(buffer) status = "{0:16}".format(file_size_dl) if file_size: status += " [{0:6.2f}%]".format(file_size_dl * 100 / file_size) status += chr(13) logger.info(status) return filename
def dump_file(self, filename, existed_check=False): if existed_check and os.path.exists(filename): logger.info('already generated file (%s) => (%s) (%s)' % (filename, self.folder, self.title)) return with open(filename, 'w+') as fp: cson.dump(self._data, fp)
def find_inner_link(self): link_pattern = '\[([\w ]+)[\]|\/]\(\:note\:([\w]+)\)' for storage, folder, note in self.boostnote.walk_note(): if note.type == NoteType.SNIPPET_NOTE: continue matchs = re.findall(link_pattern, note.content) if len(matchs) > 0: for idx, (link_name, cson) in enumerate(matchs): if cson in storage.notes: logger.info('%02d : %s ==> %s' % (idx, cson, storage.notes[cson])) else: logger.info('%02d : %s ==> broken link' % (idx, cson))
def moniwiki_page_attach(wiki_root: str, wiki_title: str, wiki_text: str): pds_base = moniwiki_pds_url(wiki_root, wiki_title) patterns = [('(\[\[[cC][lL][iI][pP]\()([a-zA-Z0-9]+)(\)\]\])', lambda x: x[1] + '.png'), ('(attachment\:)([a-zA-Z0-9_+\-\.\[\]\@가-힣]+.[a-z0-0]+)', lambda x: x[1])] for pattern, rep in patterns: matchs = re.findall(pattern, wiki_text) for idx, match in enumerate(matchs): full_url = urljoin(pds_base + '/', quote(rep(match))) if url_exists(full_url): yield idx, ''.join(match), rep(match), full_url else: logger.info('not found: %s' % full_url)
def convert_contents(cls, contents: str, method=[]) -> str: old_contents = contents for key, from_rep, to_rep in cls.note_replace: if method == []: old_contents = re.sub(from_rep, to_rep, old_contents, re.UNICODE) elif key in method: if cls.verbose: matchs = re.findall(from_rep, old_contents) if len(matchs) > 0: for match in matchs: logger.info(match) old_contents = re.sub(from_rep, to_rep, old_contents, re.UNICODE) return old_contents
def do_rename_file(self, name_pattern='{path}\\notes\{folder_name}_{title}.cson' ): for storage, folder, note in self.boostnote.walk_note(): named_dict = {} named_dict.update({ 'folder_key': folder.key, 'folder_name': folder.name, 'path': storage._path }) named_dict.update(note._data) logger.info(f'old={note.filename}') note.filename = name_pattern.format(**named_dict) logger.info(f'new={note.filename}')
def __init__(self, wiki_root_url): WikiImpoter.__init__(self, wiki_root_url) parse_object = urlparse(wiki_root_url) root_url = '%s://%s' % (parse_object.scheme, parse_object.netloc) self.wiki_path = wiki_root_url.replace(root_url, '') full_url = root_url + self.wiki_path + 'wiki.php/%s' r = requests.get(full_url % 'TitleIndex') soup = BeautifulSoup(r.text, 'html.parser') no = 0 for item in soup.find_all('li'): wiki_title = self.get_wiki_title(item) if skip_page(wiki_title): continue wiki_root_url = full_url % quote(wiki_title) + '?action=raw' wiki_text = get_wiki_text_from_url(wiki_root_url) if empty_or_notfound(wiki_text): logger.info('wiki_url is empty(%s)' % wiki_root_url) continue self.append_source(wiki_title, { 'wiki_title': wiki_title, 'wiki_url': wiki_root_url, 'contents': wiki_text, 'uuid': str(uuid4()), 'links': {}, 'images': {}, }) no = no + 1 self.init_source() for key, value in self.sources.items(): self.init_wiki(value) logger.info('%d files loaded' % no)
def check(self): for storage, folder, note in self.boostnote.walk_note(): for from_rep, to_rep in self._note_replace: matchs = re.findall(from_rep, note.content) # , re.DOTALL) if self.verbose and len(matchs) > 0: logger.info( '\n<storage=%s folder=%s note=%s(%s)>' % (storage._path, folder.name, note.title, note.uuid)) logger.info('=> %s ... %d matchs found' % (from_rep.replace('\n', '\\n'), len(matchs))) for idx, match in enumerate(matchs): logger.info('%02d : %s' % (idx, match))
def do_update(self): for storage, folder, note in self.boostnote.walk_note(): content = note.content for from_rep, to_rep in self._note_replace: content = re.sub(from_rep, to_rep, content) if content != note.content: logger.info('%s note changed(%s)=>(%s)' % (note.title, from_rep, to_rep.strip())) note.content = content if self.verbose is True: old = content.split('\n') for old_line, new_line in zip(old, content.split('\n')): if (old_line != new_line): logger.info(' ' + old_line) logger.info('=>' + new_line)
def to_normal_md(s: Storage, n: Note, export_to_path, export_attach_method=AttachPathType.LinkRelativePath) -> bool: """ export boost markdown to normal markdown 1. remove :storage, :note 2. insert front-meta data """ # preparing variables for converting export_folder_path = os.path.join(export_to_path, s.folders[n.folder].name) if not os.path.exists(export_folder_path): os.mkdir(export_folder_path) filename = normalize_filename(n.title) md_storage = os.path.join(export_folder_path, filename) # split front-meta and content note_metadata, content = split_frontmeta_and_content(n.content) # remove :storage rx_storage = re.compile(r'\(:storage[\\|/]([a-z0-9\-]+)[\\|/]([\w\.]+)\)', re.MULTILINE) for match in rx_storage.finditer(content): uuid, linkname = match.groups() # link methos link_source_path = os.path.join(s._path, 'attachments', uuid, linkname) if export_attach_method == AttachPathType.LinkRelativePath: link_target_path = os.path.join(uuid_to_md_path(s, export_to_path, uuid), linkname) elif export_attach_method == AttachPathType.CopyToMarkdownSubPath: link_target_path = os.path.join(md_storage, linkname) elif export_attach_method == AttachPathType.CopyToMarkdownPath: link_target_path = os.path.join(export_folder_path, linkname) else: raise ValueError('not support variable type') if not os.path.exists(link_target_path): if not os.path.exists(os.path.dirname(link_target_path)): os.mkdir(os.path.dirname(link_target_path)) if not os.path.exists(link_source_path): logger.info(f'missing file {n.title}/{linkname}') else: copy2(link_source_path, link_target_path) # replace \\ type path name to / type, \\ occur error in windows case link_target_path = os.path.relpath(link_target_path, export_folder_path) link_target_path = link_target_path.replace('\\', '/') content = content.replace(match.group(), f'({link_target_path})' ) # remove :note rx_note_link = re.compile(r'\(:note:([a-z0-9\-]+)\)') for match in rx_note_link.finditer(content): uuid = match.groups()[0] link_target_path = os.path.join(uuid_to_md_path(s, export_to_path, uuid) + '.md') content = content.replace(match.group(), f'({link_target_path})' ) # update front-meta keywords = n.tags if 'tags' in note_metadata: keywords.extend(note_metadata['tags']) if keywords != []: note_metadata['keywords'] = list(set(keywords)) with codecs.open(os.path.join(export_folder_path, filename + '.md'), 'w', "utf-8") as fp: if note_metadata != {}: yaml_string = yaml.dump(note_metadata, Dumper=Dumper, default_flow_style=False, allow_unicode=True) fp.write('\n'.join(['---', yaml_string[:-1], '---\n'])) fp.write(content) return True