def import_comments_for_page(page_id): json_file_path = os.path.join(utils.DATA_DIR, 'comments', page_id + '.json') if not os.path.isfile(json_file_path): logger.debug('no file %s' % json_file_path) return with open(os.path.join(utils.DATA_DIR, 'comments', page_id + '.json'), 'r') as comments_file: comments = json.loads(comments_file.read()) pages = utils.load_pages() for comment in comments: prefix = u"""{info}\n本文档从旧 Wiki 导入,原 URL:%s\n\n原创建人:%s %s\n\n原最后更新人:%s %s\n{info}\n\n""" % ( comment['url'], comment['creator'], dateutil.parser.parse(comment['created']).strftime('%Y-%m-%d %H:%M:%S'), comment['modifier'], dateutil.parser.parse(comment['modified']).strftime('%Y-%m-%d %H:%M:%S'), ) comment['modified'] = dateutil.parser.parse(comment['modified']) new_confluence_api.addComment({ 'pageId': find_page_title_to_page_id(pages, comment['pageId']), 'title': comment['title'], 'content': fetch_convert_content(prefix, comment['content']), 'created': dateutil.parser.parse(comment['created']), 'creator': comment['creator'], })
def import_comments_for_page(page_id): json_file_path = os.path.join(utils.DATA_DIR, "comments", page_id + ".json") if not os.path.isfile(json_file_path): logger.debug("no file %s" % json_file_path) return with open(os.path.join(utils.DATA_DIR, "comments", page_id + ".json"), "r") as comments_file: comments = json.loads(comments_file.read()) pages = utils.load_pages() for comment in comments: prefix = u"""{info}\n本文档从旧 Wiki 导入,原 URL:%s\n\n原创建人:%s %s\n\n原最后更新人:%s %s\n{info}\n\n""" % ( comment["url"], comment["creator"], dateutil.parser.parse(comment["created"]).strftime("%Y-%m-%d %H:%M:%S"), comment["modifier"], dateutil.parser.parse(comment["modified"]).strftime("%Y-%m-%d %H:%M:%S"), ) comment["modified"] = dateutil.parser.parse(comment["modified"]) new_confluence_api.addComment( { "pageId": find_page_title_to_page_id(pages, comment["pageId"]), "title": comment["title"], "content": fetch_convert_content(prefix, comment["content"]), "created": dateutil.parser.parse(comment["created"]), "creator": comment["creator"], } )
def import_attachments_for_page(page_id): json_file_path = os.path.join(utils.DATA_DIR, "attachments", page_id + ".json") pages = utils.load_pages() if not os.path.isfile(json_file_path): logger.debug("no file %s" % json_file_path) return with open(json_file_path, "r") as attachment_file: attachments = json.loads(attachment_file.read()) for attachment in attachments: with open( os.path.join(utils.DATA_DIR, "attachments", page_id + "_contents", attachment["id"]), "rb" ) as content_file: attachment_bin = content_file.read() new_confluence_api.addAttachment( find_page_title_to_page_id(pages, attachment["pageId"]), { "fileName": attachment["fileName"], "contentType": attachment["contentType"], "comment": attachment["comment"] + " | 导入日:%s,原作者: %s" % ( dateutil.parser.parse(attachment["created"]).strftime("%Y-%m-%d %H:%M:%S"), attachment["creator"], ), }, attachment_bin, )
def test(): import importer from api import old_confluence_api, new_confluence_api import exporter pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) exporter.dump_page('4358662')
def test(): import importer from api import old_confluence_api, new_confluence_api import exporter pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) exporter.dump_page("4358662")
def batch_import(name, func, *args, **kwargs): pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) success_count = 0 fail_count = 0 for page in ordered_pages: try: func(page['id'], *args, **kwargs) except Exception as e: logger.debug(e) fail_count += 1 continue time.sleep(0.1) success_count += 1 logger.info('import %s, page: %s, title: %s, s/f/t: %d/%d/%d' % ( name, page['id'], page['title'], success_count, fail_count, success_count + fail_count)) logger.info('import %s, s/f/t: %d/%d/%d' % ( name, success_count, fail_count, success_count + fail_count))
def import_pages(): pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) success_count = 0 fail_count = 0 SKIP_TO_ID = None for page in ordered_pages: if SKIP_TO_ID is not None: if page['id'] == SKIP_TO_ID: SKIP_TO_ID = None else: continue try: old_parent_id_title = dict([(x['id'], x['title']) for x in pages]) if not page['parentId'] in old_parent_id_title and not page['parentId'] == '0': logger.error('No old parent, title: %s, old page id: %s' % ( page['title'], page['parentId'])) return if page['parentId'] == '0': new_parent_id = '0' else: try: new_parent_id = find_page_title_to_page_id(pages, page['parentId']) # new_parent_id = new_confluence_api.getPage(NEW_SPACE_KEY, old_parent_id_title[ # page['parentId']])['id'] except Exception as e: raise ValueError('cannot locate %s, e: %s' % (page['id'], e)) import_page(page['id'], new_parent_id) except xmlrpc.client.Fault as e: if ('Transaction rolled back because it has been marked as rollback-only' in e.faultString): logger.info('duplicate, page id: %s, title: %s' % (page['id'], page['title'])) else: fail_count += 1 logger.error('import error, page_id: %s, title: %s, e: %s' % ( page['id'], page['title'], e)) raise e # time.sleep(0.01) success_count += 1 logger.info('import %s, page: %s, title: %s, s/f/t: %d/%d/%d' % ( 'page', page['id'], page['title'], success_count, fail_count, success_count + fail_count)) logger.info('import %s, s/f/t: %d/%d/%d' % ( 'page', success_count, fail_count, success_count + fail_count))
def batch_import(name, func, *args, **kwargs): pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) success_count = 0 fail_count = 0 for page in ordered_pages: try: func(page["id"], *args, **kwargs) except Exception as e: logger.debug(e) fail_count += 1 continue time.sleep(0.1) success_count += 1 logger.info( "import %s, page: %s, title: %s, s/f/t: %d/%d/%d" % (name, page["id"], page["title"], success_count, fail_count, success_count + fail_count) ) logger.info("import %s, s/f/t: %d/%d/%d" % (name, success_count, fail_count, success_count + fail_count))
def import_pages(): pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) success_count = 0 fail_count = 0 SKIP_TO_ID = None for page in ordered_pages: if SKIP_TO_ID is not None: if page["id"] == SKIP_TO_ID: SKIP_TO_ID = None else: continue try: old_parent_id_title = dict([(x["id"], x["title"]) for x in pages]) if not page["parentId"] in old_parent_id_title and not page["parentId"] == "0": logger.error("No old parent, title: %s, old page id: %s" % (page["title"], page["parentId"])) return if page["parentId"] == "0": new_parent_id = "0" else: try: new_parent_id = find_page_title_to_page_id(pages, page["parentId"]) # new_parent_id = new_confluence_api.getPage(NEW_SPACE_KEY, old_parent_id_title[ # page['parentId']])['id'] except Exception as e: raise ValueError("cannot locate %s, e: %s" % (page["id"], e)) import_page(page["id"], new_parent_id) except xmlrpc.client.Fault as e: if "Transaction rolled back because it has been marked as rollback-only" in e.faultString: logger.info("duplicate, page id: %s, title: %s" % (page["id"], page["title"])) else: fail_count += 1 logger.error("import error, page_id: %s, title: %s, e: %s" % (page["id"], page["title"], e)) raise e # time.sleep(0.01) success_count += 1 logger.info( "import %s, page: %s, title: %s, s/f/t: %d/%d/%d" % ("page", page["id"], page["title"], success_count, fail_count, success_count + fail_count) ) logger.info("import %s, s/f/t: %d/%d/%d" % ("page", success_count, fail_count, success_count + fail_count))
def import_attachments_for_page(page_id): json_file_path = os.path.join(utils.DATA_DIR, 'attachments', page_id + '.json') pages = utils.load_pages() if not os.path.isfile(json_file_path): logger.debug('no file %s' % json_file_path) return with open(json_file_path, 'r') as attachment_file: attachments = json.loads(attachment_file.read()) for attachment in attachments: with open(os.path.join(utils.DATA_DIR, 'attachments', page_id + '_contents', attachment['id']), 'rb') as content_file: attachment_bin = content_file.read() new_confluence_api.addAttachment(find_page_title_to_page_id( pages, attachment['pageId']), { 'fileName': attachment['fileName'], 'contentType': attachment['contentType'], 'comment': attachment['comment'] + ' | 导入日:%s,原作者: %s' % ( dateutil.parser.parse(attachment['created']).strftime('%Y-%m-%d %H:%M:%S'), attachment['creator'], ), }, attachment_bin)
def batch_dump(name, func): pages = utils.load_pages() ordered_pages = utils.sort_pages(pages) success_count = 0 fail_count = 0 #is_skip = True for page in ordered_pages: #if page['id'] == '13730404': #is_skip = False #if is_skip: #continue try: func(page['id']) except Exception as e: logger.error('dump %s fail, page id: %s' % (name, page['id'])) fail_count += 1 raise e time.sleep(0.01) success_count += 1 logger.info('dump %s, page: %s, title: %s, s/f/t: %d/%d/%d' % ( name, page['id'], page['title'], success_count, fail_count, success_count + fail_count)) logger.info('dump %s, s/f/t: %d/%d/%d' % ( name, success_count, fail_count, success_count + fail_count))