def import_pages(import_data, parent_page): """ Take a JSON export of part of a source site's page tree and create those pages under the parent page """ pages_by_original_path = {} pages_by_original_id = {} # First create the base Page records; these contain no foreign keys, so this allows us to # build a complete mapping from old IDs to new IDs before we go on to importing the # specific page models, which may require us to rewrite page IDs within foreign keys / rich # text / streamfields. page_content_type = ContentType.objects.get_for_model(Page) for (i, page_record) in enumerate(import_data['pages']): # build a base Page instance from the exported content (so that we pick up its title and other # core attributes) page = Page.from_serializable_data(page_record['content']) original_path = page.path original_id = page.id # clear id and treebeard-related fields so that they get reassigned when we save via add_child page.id = None page.path = None page.depth = None page.numchild = 0 page.url_path = None page.content_type = page_content_type if i == 0: parent_page.add_child(instance=page) else: # Child pages are created in the same sibling path order as the # source tree because the export is ordered by path parent_path = original_path[:-(Page.steplen)] pages_by_original_path[parent_path].add_child(instance=page) pages_by_original_path[original_path] = page pages_by_original_id[original_id] = page for (i, page_record) in enumerate(import_data['pages']): # Get the page model of the source page by app_label and model name # The content type ID of the source page is not in general the same # between the source and destination sites but the page model needs # to exist on both. # Raises LookupError exception if there is no matching model model = apps.get_model(page_record['app_label'], page_record['model']) specific_page = model.from_serializable_data(page_record['content'], check_fks=False, strict_fks=False) base_page = pages_by_original_id[specific_page.id] specific_page.page_ptr = base_page specific_page.__dict__.update(base_page.__dict__) specific_page.content_type = ContentType.objects.get_for_model(model) update_page_references(specific_page, pages_by_original_id) specific_page.save() return len(import_data['pages'])
def import_pages(import_data, parent_page): """ Overwrite of the wagtailimportexport `import_page` function to handle generic csvs. The standard `import_pages` assumes that your pages will have a pk from the exported json files. It does not facilitate the idea that the pages you import will be new pages. """ pages_by_original_id = {} # First create the base Page records; these contain no foreign keys, so this allows us to # build a complete mapping from old IDs to new IDs before we go on to importing the # specific page models, which may require us to rewrite page IDs within foreign keys / rich # text / streamfields. page_content_type = ContentType.objects.get_for_model(Page) for page_record in import_data['pages']: # build a base Page instance from the exported content # (so that we pick up its title and other core attributes) page = Page.from_serializable_data(page_record['content']) # clear id and treebeard-related fields so that # they get reassigned when we save via add_child page.id = None page.path = None page.depth = None page.numchild = 0 page.url_path = None page.content_type = page_content_type parent_page.add_child(instance=page) # Custom Code to add the new pk back into the original page record. page_record['content']['pk'] = page.pk pages_by_original_id[page.id] = page for page_record in import_data['pages']: # Get the page model of the source page by app_label and model name # The content type ID of the source page is not in general the same # between the source and destination sites but the page model needs # to exist on both. # Raises LookupError exception if there is no matching model model = apps.get_model(page_record['app_label'], page_record['model']) specific_page = model.from_serializable_data(page_record['content'], check_fks=False, strict_fks=False) base_page = pages_by_original_id[specific_page.id] specific_page.page_ptr = base_page specific_page.__dict__.update(base_page.__dict__) specific_page.content_type = ContentType.objects.get_for_model(model) update_page_references(specific_page, pages_by_original_id) specific_page.save() return len(import_data['pages'])
def import_page(uploaded_archive, parent_page, overwrites={}): """ Imports uploaded_archive as children of parent_page. Arguments: uploaded_archive -- A file object, which includes contents.json and the media objects. parent_page -- Page object, where the page(s) will be imported to. Returns: numpages -- Integer value of number of pages that were successfully imported. numfails -- Integer value of number of pages that were failed to be imported. message -- String message to report any warning/issue. """ # Read the zip archive and load as 'payload'. payload = io.BytesIO(uploaded_archive.read()) # Open zip archive. with ZipFile(payload, 'r') as zf: try: # Open content.json and load them into contents dictionary. with zf.open('content.json') as mf: contents = json.loads(mf.read().decode('utf-8-sig')) # First create the base Page records; these contain no foreign keys, so this allows us to # build a complete mapping from old IDs to new IDs before we go on to importing the # specific page models, which may require us to rewrite page IDs within foreign keys / rich # text / streamfields. page_content_type = ContentType.objects.get_for_model(Page) # Unzip all the files in the zip directory. contents_mapping = functions.unzip_contents(zf) # Get the list of pages to skip. existing_pages = list_existing_pages( contents) if not overwrites else [] # Dictionaries to store original paths. pages_by_original_path = {} pages_by_original_id = {} # Loop through all the pages. for (i, page_record) in enumerate(contents): new_field_datas = {} # Skip the existing pages. if i in existing_pages: continue # Reassign document IDs. for (fieldname, filedata) in page_record["documents"].items(): new_field_datas[fieldname] = None # Skip if the document is set to null. if not filedata: continue local_file_query = get_fileobject( filedata["file"]["name"].split("/")[-1], Document) local_file_id = local_file_query if local_file_query else create_fileobject( filedata["title"], contents_mapping[filedata["file"]["name"]], Document) new_field_datas[fieldname] = local_file_id # Reassign image IDs. for (fieldname, filedata) in page_record["images"].items(): new_field_datas[fieldname] = None # Skip if the image is set to null. if not filedata: continue local_file_query = get_fileobject( filedata["file"]["name"].split("/")[-1], Image) local_file_id = local_file_query if local_file_query else create_fileobject( filedata["title"], contents_mapping[filedata["file"]["name"]], Image) new_field_datas[fieldname] = local_file_id # Overwrite image and document IDs for (field, new_value) in new_field_datas.items(): page_record['content'][field] = new_value # Misc. overwrites for (field, new_value) in overwrites.items(): page_record['content'][field] = new_value # Create page instance. page = Page.from_serializable_data(page_record['content']) original_path = page.path original_id = page.id # Clear id and treebeard-related fields so that they get reassigned when we save via add_child page.id = None page.path = None page.depth = None page.numchild = 0 page.url_path = None page.content_type = page_content_type # Handle children of the imported page(s). if i == 0: parent_page.add_child(instance=page) else: # Child pages are created in the same sibling path order as the # source tree because the export is ordered by path parent_path = original_path[:-(Page.steplen)] pages_by_original_path[parent_path].add_child( instance=page) pages_by_original_path[original_path] = page pages_by_original_id[original_id] = page # Get the page model of the source page by app_label and model name # The content type ID of the source page is not in general the same # between the source and destination sites but the page model needs # to exist on both. try: model = apps.get_model(page_record['app_label'], page_record['model']) except LookupError: logging.error( "Importing file failed because the model " + page_record['model'] + " does not exist on this environment.") return (0, 1, "Importing file failed because the model " + page_record['model'] + " does not exist on this environment.") specific_page = model.from_serializable_data( page_record['content'], check_fks=False, strict_fks=False) base_page = pages_by_original_id[specific_page.id] specific_page.page_ptr = base_page specific_page.__dict__.update(base_page.__dict__) specific_page.content_type = ContentType.objects.get_for_model( model) update_page_references(specific_page, pages_by_original_id) specific_page.save() return (len(contents) - len(existing_pages), len(existing_pages), "") except LookupError as e: # If content.json does not exist, then return the error, # and terminate the import_page. logging.error( "Importing file failed because file does not exist: " + str(e)) return (0, 1, "File does not exist: " + str(e)) return (0, 1, "")