Ejemplo n.º 1
0
def create_article(bustopic,title="Root", article_kwargs={}, content="",user_message="",request=None):
    """Utility function:
    Create a new urlpath with an article and a new revision for the article"""
    article = Article(**article_kwargs)
    ar = ArticleRevision()

    ar.content = content 
    ar.user_message =  user_message 
    ar.deleted = False
    if request:
        ar.set_from_request(request)
    else:
        ar.ip_address = None
        ar.user = get_default_user()
    article.add_revision(ar, save=True)

    article.save()
    bustopic.article=article
    bustopic.save() 
    return ar
Ejemplo n.º 2
0
    def import_page(self, api, site, page, current_site, url_root,
                    user_matching, replace_existing):

        import pypandoc

        # Filter titles, to avoid stranges charaters.
        title = only_printable(page.title)
        urltitle = slugify(only_printable(urllib.unquote(page.urltitle))[:50])

        added = 1

        while urltitle in self.articles_worked_on:
            title = only_printable(page.title) + " " + str(added)
            urltitle = only_printable(
                slugify((urllib.unquote(page.urltitle))[:47] + " " +
                        str(added)))
            added += 1

        self.articles_worked_on.append(urltitle)

        print("Working on %s (%s)" % (title, urltitle))

        # Check if the URL path already exists
        try:
            urlp = URLPath.objects.get(slug=urltitle)

            self.matching_old_link_new_link[
                page.title] = urlp.article.get_absolute_url()

            if not replace_existing:
                print("\tAlready existing, skipping...")
                return

            print("\tDestorying old version of the article")
            urlp.article.delete()

        except URLPath.DoesNotExist:
            pass

        # Create article
        article = Article()

        for history_page in page.getHistory()[-2:][::-1]:

            try:
                if history_page['user'] in user_matching:
                    user = get_user_model().objects.get(
                        pk=user_matching[history_page['user']])
                else:
                    user = get_user_model().objects.get(
                        username=history_page['user'])
            except get_user_model().DoesNotExist:
                print(
                    "\tCannot found user with username=%s. Use --user-matching \"%s:<user_pk>\" to manualy set it"
                    % (
                        history_page['user'],
                        history_page['user'],
                    ))
                user = None

            article_revision = ArticleRevision()
            article_revision.content = pypandoc.convert(
                history_page['*'], 'md', 'mediawiki')
            article_revision.title = title
            article_revision.user = user
            article_revision.owner = user

            article.add_revision(article_revision, save=True)

            article_revision.created = history_page['timestamp']
            article_revision.save()

        # Updated lastest content WITH expended templates
        # TODO ? Do that for history as well ?
        article_revision.content = pypandoc.convert(
            striptags(page.getWikiText(True, True).decode('utf-8')).replace(
                '__NOEDITSECTION__', '').replace('__NOTOC__', ''), 'md',
            'mediawiki')
        article_revision.save()

        article.save()

        upath = URLPath.objects.create(site=current_site,
                                       parent=url_root,
                                       slug=urltitle,
                                       article=article)
        article.add_object_relation(upath)

        self.matching_old_link_new_link[
            page.title] = upath.article.get_absolute_url()

        self.articles_imported.append((article, article_revision))
Ejemplo n.º 3
0
    def import_page(self, api, site, page, current_site, url_root, user_matching, replace_existing):

        import pypandoc

        # Filter titles, to avoid stranges charaters.
        title = only_printable(page.title)
        urltitle = slugify(only_printable(urllib.unquote(page.urltitle))[:50])

        added = 1

        while urltitle in self.articles_worked_on:
            title = only_printable(page.title) + " " + str(added)
            urltitle = only_printable(slugify((urllib.unquote(page.urltitle))[:47] + " " + str(added)))
            added += 1

        self.articles_worked_on.append(urltitle)

        print "Working on %s (%s)" % (title, urltitle)

        # Check if the URL path already exists
        try:
            urlp = URLPath.objects.get(slug=urltitle)

            self.matching_old_link_new_link[page.title] = urlp.article.get_absolute_url()

            if not replace_existing:
                print "\tAlready existing, skipping..."
                return

            print "\tDestorying old version of the article"
            urlp.article.delete()

        except URLPath.DoesNotExist:
            pass

        # Create article
        article = Article()

        for history_page in page.getHistory()[-2:][::-1]:

            try:
                if history_page['user'] in user_matching:
                    user = get_user_model().objects.get(pk=user_matching[history_page['user']])
                else:
                    user = get_user_model().objects.get(username=history_page['user'])
            except get_user_model().DoesNotExist:
                print "\tCannot found user with username=%s. Use --user-matching \"%s:<user_pk>\" to manualy set it" % (history_page['user'], history_page['user'], )
                user = None

            article_revision = ArticleRevision()
            article_revision.content = pypandoc.convert(history_page['*'], 'md', 'mediawiki')
            article_revision.title = title
            article_revision.user = user
            article_revision.owner = user

            article.add_revision(article_revision, save=True)

            article_revision.created = history_page['timestamp']
            article_revision.save()

        # Updated lastest content WITH expended templates
        # TODO ? Do that for history as well ?
        article_revision.content = pypandoc.convert(striptags(page.getWikiText(True, True).decode('utf-8')).replace('__NOEDITSECTION__', '').replace('__NOTOC__', ''), 'md', 'mediawiki')
        article_revision.save()

        article.save()

        upath = URLPath.objects.create(site=current_site, parent=url_root, slug=urltitle, article=article)
        article.add_object_relation(upath)

        self.matching_old_link_new_link[page.title] = upath.article.get_absolute_url()

        self.articles_imported.append((article, article_revision))
Ejemplo n.º 4
0
    def import_page(
        self,
        api,
        site,
        page,
        current_site,
        url_root,
        user_matching,
        replace_existing,
    ):

        from wikitools.pagelist import listFromQuery

        # Filter titles, to avoid stranges charaters.
        title = page.title
        urltitle = title
        urltitle = urltitle.replace("ø", "o")
        urltitle = urltitle.replace("æ", "ae")
        urltitle = urltitle.replace("å", "a")
        urltitle = urltitle.replace("Ø", "O")
        urltitle = urltitle.replace("Æ", "AE")
        urltitle = urltitle.replace("Å", "A")
        urltitle = only_printable(urltitle)
        urltitle = slugify(only_printable(urllib.parse.unquote(urltitle))[:50])

        added = 1

        while urltitle in self.articles_worked_on:
            title = only_printable("{} {}".format(page.title, added))
            urltitle = slugify("{} {}".format(
                only_printable(urllib.parse.unquote(page.urltitle))[:47],
                added))

            added += 1

        self.articles_worked_on.append(urltitle)

        print("Working on {} ({})".format(title, urltitle))
        print(url_root)
        print(urltitle)
        print()
        # Check if the URL path already exists
        try:
            urlp = URLPath.objects.get(slug=urltitle)

            self.matching_old_link_new_link[
                page.title] = urlp.article.get_absolute_url()

            if not replace_existing:
                print("\tAlready existing, skipping...")
                return

            print("\tDestorying old version of the article")
            urlp.article.delete()

        except URLPath.DoesNotExist:
            pass

        # Create article
        article = Article()

        history_page = page.getHistory()[0]

        try:
            if history_page['user'] in user_matching:
                user = get_user_model().objects.get(
                    pk=user_matching[history_page['user']])
            else:
                user = get_user_model().objects.get(
                    username=history_page['user'])
        except get_user_model().DoesNotExist:
            user = None
        except Exception:
            print("Couldn't find user. Something is weird.")

        article_revision = ArticleRevision()
        article_revision.content = refactor(page.getWikiText())
        article_revision.title = title
        article_revision.user = user
        article_revision.owner = user
        article_revision.content = re.sub("\[\[.*(Category|Kategori).*\]\]\n",
                                          "", article_revision.content)

        article.add_revision(article_revision, save=True)

        article_revision.created = history_page['timestamp']
        article_revision.save()

        # Updated lastest content WITH expended templates
        # TODO ? Do that for history as well ?

        article_revision.save()

        article.save()

        upath = URLPath.objects.create(site=current_site,
                                       parent=url_root,
                                       slug=urltitle,
                                       article=article)
        article.add_object_relation(upath)

        self.matching_old_link_new_link[
            page.title] = upath.article.get_absolute_url()

        self.articles_imported.append((article, article_revision))
Ejemplo n.º 5
0
    def import_page(
            self,
            api,
            site,
            page,
            current_site,
            url_root,
            user_matching,
            replace_existing,
            ):

        from wikitools.pagelist import listFromQuery



        # Filter titles, to avoid stranges charaters.
        title = page.title
        urltitle = title
        urltitle = urltitle.replace("ø", "o") 
        urltitle = urltitle.replace("æ", "ae") 
        urltitle = urltitle.replace("å", "a") 
        urltitle = urltitle.replace("Ø", "O") 
        urltitle = urltitle.replace("Æ", "AE") 
        urltitle = urltitle.replace("Å", "A") 
        urltitle = only_printable(urltitle)
        urltitle = slugify(only_printable(urllib.parse.unquote(urltitle))[:50])


        added = 1

        while urltitle in self.articles_worked_on:
            title = only_printable("{} {}".format(page.title, added))
            urltitle = slugify(
                "{} {}".format(only_printable(urllib.parse.unquote(page.urltitle))[:47], added)
            )

            added += 1

        self.articles_worked_on.append(urltitle)

        print("Working on {} ({})".format(title, urltitle))
        print(url_root)
        print(urltitle)
        print()
        # Check if the URL path already exists
        try:
            urlp = URLPath.objects.get(slug=urltitle)

            self.matching_old_link_new_link[
                page.title] = urlp.article.get_absolute_url()

            if not replace_existing:
                print("\tAlready existing, skipping...")
                return

            print("\tDestorying old version of the article")
            urlp.article.delete()

        except URLPath.DoesNotExist:
            pass

        # Create article
        article = Article()

        history_page = page.getHistory()[0]

        try:
            if history_page['user'] in user_matching:
                user = get_user_model().objects.get(
                    pk=user_matching[
                        history_page['user']])
            else:
                user = get_user_model().objects.get(
                    username=history_page['user'])
        except get_user_model().DoesNotExist:
            user = None
        except Exception:
            print("Couldn't find user. Something is weird.")

        article_revision = ArticleRevision()
        article_revision.content = refactor(page.getWikiText())
        article_revision.title = title
        article_revision.user = user
        article_revision.owner = user
        article_revision.content = re.sub("\[\[.*(Category|Kategori).*\]\]\n", "", article_revision.content)

        article.add_revision(article_revision, save=True)

        article_revision.created = history_page['timestamp']
        article_revision.save()

        # Updated lastest content WITH expended templates
        # TODO ? Do that for history as well ?
        
        article_revision.save()

        article.save()

        upath = URLPath.objects.create(
            site=current_site,
            parent=url_root,
            slug=urltitle,
            article=article)
        article.add_object_relation(upath)

        self.matching_old_link_new_link[
            page.title] = upath.article.get_absolute_url()

        self.articles_imported.append((article, article_revision))