def import_pages(import_data, parent_page):
    """
    Take a JSON export of part of a source site's page tree
    and create those pages under the parent page
    """
    pages_by_original_path = {}
    pages_by_original_id = {}

    # First create the base Page records; these contain no foreign keys, so this allows us to
    # build a complete mapping from old IDs to new IDs before we go on to importing the
    # specific page models, which may require us to rewrite page IDs within foreign keys / rich
    # text / streamfields.
    page_content_type = ContentType.objects.get_for_model(Page)
    for (i, page_record) in enumerate(import_data['pages']):
        # build a base Page instance from the exported content (so that we pick up its title and other
        # core attributes)
        page = Page.from_serializable_data(page_record['content'])
        original_path = page.path
        original_id = page.id

        # clear id and treebeard-related fields so that they get reassigned when we save via add_child
        page.id = None
        page.path = None
        page.depth = None
        page.numchild = 0
        page.url_path = None
        page.content_type = page_content_type
        if i == 0:
            parent_page.add_child(instance=page)
        else:
            # Child pages are created in the same sibling path order as the
            # source tree because the export is ordered by path
            parent_path = original_path[:-(Page.steplen)]
            pages_by_original_path[parent_path].add_child(instance=page)

        pages_by_original_path[original_path] = page
        pages_by_original_id[original_id] = page

    for (i, page_record) in enumerate(import_data['pages']):
        # Get the page model of the source page by app_label and model name
        # The content type ID of the source page is not in general the same
        # between the source and destination sites but the page model needs
        # to exist on both.
        # Raises LookupError exception if there is no matching model
        model = apps.get_model(page_record['app_label'], page_record['model'])

        specific_page = model.from_serializable_data(page_record['content'],
                                                     check_fks=False,
                                                     strict_fks=False)
        base_page = pages_by_original_id[specific_page.id]
        specific_page.page_ptr = base_page
        specific_page.__dict__.update(base_page.__dict__)
        specific_page.content_type = ContentType.objects.get_for_model(model)
        update_page_references(specific_page, pages_by_original_id)
        specific_page.save()

    return len(import_data['pages'])
def import_pages(import_data, parent_page):
    """
    Overwrite of the wagtailimportexport `import_page` function to handle generic csvs.
    The standard `import_pages` assumes that your pages will have a pk from the exported
    json files.  It does not facilitate the idea that the pages you import will be
    new pages.
    """

    pages_by_original_id = {}

    # First create the base Page records; these contain no foreign keys, so this allows us to
    # build a complete mapping from old IDs to new IDs before we go on to importing the
    # specific page models, which may require us to rewrite page IDs within foreign keys / rich
    # text / streamfields.
    page_content_type = ContentType.objects.get_for_model(Page)

    for page_record in import_data['pages']:
        # build a base Page instance from the exported content
        # (so that we pick up its title and other core attributes)
        page = Page.from_serializable_data(page_record['content'])

        # clear id and treebeard-related fields so that
        # they get reassigned when we save via add_child
        page.id = None
        page.path = None
        page.depth = None
        page.numchild = 0
        page.url_path = None
        page.content_type = page_content_type
        parent_page.add_child(instance=page)

        # Custom Code to add the new pk back into the original page record.
        page_record['content']['pk'] = page.pk

        pages_by_original_id[page.id] = page

    for page_record in import_data['pages']:
        # Get the page model of the source page by app_label and model name
        # The content type ID of the source page is not in general the same
        # between the source and destination sites but the page model needs
        # to exist on both.
        # Raises LookupError exception if there is no matching model
        model = apps.get_model(page_record['app_label'], page_record['model'])

        specific_page = model.from_serializable_data(page_record['content'],
                                                     check_fks=False,
                                                     strict_fks=False)
        base_page = pages_by_original_id[specific_page.id]
        specific_page.page_ptr = base_page
        specific_page.__dict__.update(base_page.__dict__)
        specific_page.content_type = ContentType.objects.get_for_model(model)
        update_page_references(specific_page, pages_by_original_id)
        specific_page.save()

    return len(import_data['pages'])
Exemple #3
0
def import_page(uploaded_archive, parent_page, overwrites={}):
    """
    Imports uploaded_archive as children of parent_page.

    Arguments:
    uploaded_archive -- A file object, which includes contents.json 
    and the media objects.
    parent_page -- Page object, where the page(s) will be imported to.

    Returns:
    numpages -- Integer value of number of pages that were successfully
    imported.
    numfails -- Integer value of number of pages that were failed to be
    imported.
    message -- String message to report any warning/issue.
    """

    # Read the zip archive and load as 'payload'.
    payload = io.BytesIO(uploaded_archive.read())

    # Open zip archive.
    with ZipFile(payload, 'r') as zf:
        try:
            # Open content.json and load them into contents dictionary.
            with zf.open('content.json') as mf:
                contents = json.loads(mf.read().decode('utf-8-sig'))

                # First create the base Page records; these contain no foreign keys, so this allows us to
                # build a complete mapping from old IDs to new IDs before we go on to importing the
                # specific page models, which may require us to rewrite page IDs within foreign keys / rich
                # text / streamfields.
                page_content_type = ContentType.objects.get_for_model(Page)

                # Unzip all the files in the zip directory.
                contents_mapping = functions.unzip_contents(zf)

                # Get the list of pages to skip.
                existing_pages = list_existing_pages(
                    contents) if not overwrites else []

                # Dictionaries to store original paths.
                pages_by_original_path = {}
                pages_by_original_id = {}

                # Loop through all the pages.
                for (i, page_record) in enumerate(contents):

                    new_field_datas = {}

                    # Skip the existing pages.
                    if i in existing_pages:
                        continue

                    # Reassign document IDs.
                    for (fieldname,
                         filedata) in page_record["documents"].items():

                        new_field_datas[fieldname] = None

                        # Skip if the document is set to null.
                        if not filedata:
                            continue

                        local_file_query = get_fileobject(
                            filedata["file"]["name"].split("/")[-1], Document)

                        local_file_id = local_file_query if local_file_query else create_fileobject(
                            filedata["title"],
                            contents_mapping[filedata["file"]["name"]],
                            Document)

                        new_field_datas[fieldname] = local_file_id

                    # Reassign image IDs.
                    for (fieldname, filedata) in page_record["images"].items():

                        new_field_datas[fieldname] = None

                        # Skip if the image is set to null.
                        if not filedata:
                            continue

                        local_file_query = get_fileobject(
                            filedata["file"]["name"].split("/")[-1], Image)

                        local_file_id = local_file_query if local_file_query else create_fileobject(
                            filedata["title"],
                            contents_mapping[filedata["file"]["name"]], Image)

                        new_field_datas[fieldname] = local_file_id

                    # Overwrite image and document IDs
                    for (field, new_value) in new_field_datas.items():
                        page_record['content'][field] = new_value

                    # Misc. overwrites
                    for (field, new_value) in overwrites.items():
                        page_record['content'][field] = new_value

                    # Create page instance.
                    page = Page.from_serializable_data(page_record['content'])

                    original_path = page.path
                    original_id = page.id

                    # Clear id and treebeard-related fields so that they get reassigned when we save via add_child
                    page.id = None
                    page.path = None
                    page.depth = None
                    page.numchild = 0
                    page.url_path = None
                    page.content_type = page_content_type

                    # Handle children of the imported page(s).
                    if i == 0:
                        parent_page.add_child(instance=page)
                    else:
                        # Child pages are created in the same sibling path order as the
                        # source tree because the export is ordered by path
                        parent_path = original_path[:-(Page.steplen)]
                        pages_by_original_path[parent_path].add_child(
                            instance=page)

                    pages_by_original_path[original_path] = page
                    pages_by_original_id[original_id] = page

                    # Get the page model of the source page by app_label and model name
                    # The content type ID of the source page is not in general the same
                    # between the source and destination sites but the page model needs
                    # to exist on both.
                    try:
                        model = apps.get_model(page_record['app_label'],
                                               page_record['model'])
                    except LookupError:
                        logging.error(
                            "Importing file failed because the model " +
                            page_record['model'] +
                            " does not exist on this environment.")
                        return (0, 1,
                                "Importing file failed because the model " +
                                page_record['model'] +
                                " does not exist on this environment.")

                    specific_page = model.from_serializable_data(
                        page_record['content'],
                        check_fks=False,
                        strict_fks=False)

                    base_page = pages_by_original_id[specific_page.id]
                    specific_page.page_ptr = base_page
                    specific_page.__dict__.update(base_page.__dict__)
                    specific_page.content_type = ContentType.objects.get_for_model(
                        model)
                    update_page_references(specific_page, pages_by_original_id)
                    specific_page.save()

            return (len(contents) - len(existing_pages), len(existing_pages),
                    "")

        except LookupError as e:
            # If content.json does not exist, then return the error,
            # and terminate the import_page.
            logging.error(
                "Importing file failed because file does not exist: " + str(e))
            return (0, 1, "File does not exist: " + str(e))

    return (0, 1, "")