Example #1
0
    def migrate_pages(self, urls, root_url):
        for url in urls:
            self.__current_context = self.__inital_context
    
            folders = url[len(root_url):].split('/')
            folders.pop()
            while '' in folders:
                folders.remove('')

            for folder in folders:
                self.__create_folder(folder)
                self.__current_context = self.__current_context[folder]
            
            try:
                downloaded_html = urllib.urlopen(url)
            except IOError:
                self.__errors.append("Error connecting to: " + url)
                continue
                
            html = downloaded_html.read()
            reader = HTMLReader()
            
            try:
                reader.feed(html)
            except HTMLParseError:
                self.__errors.append("Error parsing: " + url)
            else:
                title = reader.get_title()
                filename = url.split('/').pop()
                cleanedHTML = scrubHTML(html, raise_error=False)
                self.__create_page(filename, title, cleanedHTML)     
Example #2
0
def safe_html(html):
        return scrubHTML(html, raise_error=False)
Example #3
0
    def get_citation_string(real_self):
        """
        >>> from mock import Mock
        >>> at_mock = Mock()
        >>> at_mock.get = lambda x: None
        >>> at_mock.customCitation = ''
        >>> at_mock.title = "Plone Mag♥"
        >>> at_mock.reviewAuthors = [{'firstname' : 'Cillian♥', 'lastname'  : 'de Roiste♥'}]
        >>> at_mock.yearOfPublication = '2009♥'
        >>> at_mock.officialYearOfPublication = '2010♥'
        >>> at_mock.publisher = 'SYSLAB.COM GmbH♥'
        >>> at_mock.placeOfPublication = 'München♥'
        >>> at_mock.volumeNumber = '1♥'
        >>> at_mock.issueNumber = '3♥'
        >>> at_mock.get_issue_title = lambda :'Open Source Mag 1♥'
        >>> at_mock.get_volume_title = lambda :'Open Source Mag Vol 1♥'
        >>> at_mock.get_publication_title = lambda :'Open Source♥'
        >>> at_mock.portal_url = lambda :'http://www.syslab.com'
        >>> at_mock.UID = lambda :'12345'
        >>> at_mock.canonical_uri = ''
        >>> at_mock.page_start_end_in_print = '11-21'
        >>> review = ReviewJournalNoMagic(at_mock)
        >>> review.directTranslate = lambda m: m.default
        >>> review.get_citation_string()
        u'de Roiste\u2665, Cillian\u2665: review of: Plone Mag\u2665, 1\u2665, 3\u2665 (2010\u2665/2009\u2665), in: Open Source\u2665, Open Source Mag Vol 1\u2665, Open Source Mag 1\u2665, p. 11-21, <a href="http://syslab.com/r/12345">http://syslab.com/r/12345</a>'


        Return the citation according to this schema:
        [Rezensent Nachname], [Rezensent Vorname]: review of: [Zs-Titel der rez. Zs.], [Nummer], [Heftnummer (gezähltes Jahr/Erscheinungsjahr)], in: [Zs-Titel], [Nummer], [Heftnummer (gezähltes Jahr/Erscheinungsjahr)], p.[pageStart]-[pageEnd] URL recensio.

        The years of the magazine article reviewing the other magazine does
        not exist.
        """
        self = real_self.magic
        if self.customCitation:
            return scrubHTML(self.customCitation).decode('utf8')

        rev_details_formatter = getFormatter(u', ', u', ', u' ')
        mag_year = getFormatter('/')(self.officialYearOfPublication,
                                     self.yearOfPublication)
        mag_year = mag_year and '(' + mag_year + ')' or None
        item_string = rev_details_formatter(self.title, self.volumeNumber,
                                            self.issueNumber, mag_year)

        reference_mag = getFormatter(', ',  ', ')
        reference_mag_string = reference_mag(self.get_publication_title(),
                                             self.get_volume_title(),
                                             self.get_issue_title())

        location = real_self.getUUIDUrl()
        if getattr(self, "canonical_uri", False): #3102
            location = real_self.directTranslate(
                Message(u"label_downloaded_via_recensio", "recensio"))

        rezensent_string = get_formatted_names(
            u' / ', ', ', self.reviewAuthors, lastname_first = True)
        args = {
            'review_of' : real_self.directTranslate(Message(
                    u"text_review_of", "recensio", default="review of:")),
            'in'        : real_self.directTranslate(Message(
                    u"text_in", "recensio", default="in:")),
            'page'      : real_self.directTranslate(Message(
                    u"text_pages", "recensio", default="p.")),
            ':'         : real_self.directTranslate(Message(
                    u"text_colon", "recensio", default=":")),
            }
        citation_formatter = getFormatter(
            u'%(:)s %(review_of)s ' % args, ', %(in)s ' % args, ', %(page)s '
            % args, u', ')
        citation_string = citation_formatter(
            escape(rezensent_string), escape(item_string),
            escape(reference_mag_string), self.page_start_end_in_print,
            location)
        return citation_string
Example #4
0
def safe_html(html):
    """ Returns the given html without dangerous elements like <script/>. The
    result will still contain html however, not replacments like '&gt;'.

    """
    return scrubHTML(html, raise_error=False)
    def get_citation_string(real_self):
        """
        Either return the custom citation or the generated one
        >>> from mock import Mock
        >>> at_mock = Mock()
        >>> at_mock.customCitation = ''
        >>> at_mock.get = lambda x: None
        >>> at_mock.formatted_authors_editorial = u"Gerken\u2665, Patrick\u2665 / Pilz, Alexander"
        >>> at_mock.title = "Plone 4.0♥?"
        >>> at_mock.subtitle = "Das Benutzerhandbuch♥"
        >>> at_mock.reviewAuthors = [{'firstname' : 'Cillian♥', 'lastname' : 'de Roiste♥'}]
        >>> at_mock.yearOfPublication = '2009♥'
        >>> at_mock.publisher = 'SYSLAB.COM GmbH♥'
        >>> at_mock.placeOfPublication = 'München♥'
        >>> at_mock.get_issue_title = lambda :'Open Source Mag 1♥'
        >>> at_mock.get_volume_title = lambda :'Open Source Mag Vol 1♥'
        >>> at_mock.get_publication_title = lambda :'Open Source♥'
        >>> at_mock.portal_url = lambda :'http://www.syslab.com'
        >>> at_mock.UID = lambda :'12345'
        >>> at_mock.canonical_uri = ''
        >>> at_mock.page_start_end_in_print = '11-21'
        >>> review = ReviewMonographNoMagic(at_mock)
        >>> review.directTranslate = lambda m: m.default
        >>> review.get_citation_string()
        u'de Roiste\u2665, Cillian\u2665: review of: Gerken\u2665, Patrick\u2665 / Pilz, Alexander, Plone 4.0\u2665? Das Benutzerhandbuch\u2665, M\\xfcnchen\u2665: SYSLAB.COM GmbH\u2665, 2009\u2665, in: Open Source\u2665, Open Source Mag Vol 1\u2665, Open Source Mag 1\u2665, p. 11-21, <a href="http://syslab.com/r/12345">http://syslab.com/r/12345</a>'


        Original Spec:

        [Rezensent Nachname], [Rezensent Vorname]: review of: [Werkautor Nachname], [Werkautor Vorname], [Werktitel]. [Werk-Untertitel], [Erscheinungsort]: [Verlag], [Jahr], in: [Zs-Titel], [Nummer], [Heftnummer (Erscheinungsjahr)], p.[pageStart]-[pageEnd] URL recensio.

        Werkautoren kann es mehrere geben, die werden dann durch ' / ' getrennt alle aufgelistet.
        Note: gezähltes Jahr entfernt.
        Da es die Felder Zs-Titel, Nummer und Heftnummer werden die Titel der Objekte magazine, volume, issue genommen, in dem der Review liegt

        Müller, Klaus: review of: Meier, Hans, Geschichte des Abendlandes. Ein Abriss, München: Oldenbourg, 2010, in: Zeitschrift für Geschichte, 39, 3 (2008/2009), www.recensio.net/##

        """
        self = real_self.magic
        if self.customCitation:
            return scrubHTML(self.customCitation).decode("utf8")

        args = {
            "review_of": real_self.directTranslate(Message(u"text_review_of", "recensio", default="review of:")),
            "in": real_self.directTranslate(Message(u"text_in", "recensio", default="in:")),
            "page": real_self.directTranslate(Message(u"text_pages", "recensio", default="p.")),
            ":": real_self.directTranslate(Message(u"text_colon", "recensio", default=":")),
        }
        if self.title[-1] in "!?:;.,":
            title_subtitle = getFormatter(u" ")
        else:
            title_subtitle = getFormatter(u". ")
        rev_details_formatter = getFormatter(u", ", u", ", u"%(:)s " % args, u", ")
        rezensent_string = get_formatted_names(u" / ", ", ", self.reviewAuthors, lastname_first=True)
        authors_string = self.formatted_authors_editorial
        title_subtitle_string = title_subtitle(self.title, self.subtitle)
        item_string = rev_details_formatter(
            authors_string, title_subtitle_string, self.placeOfPublication, self.publisher, self.yearOfPublication
        )
        mag_year_string = self.yearOfPublication.decode("utf-8")
        mag_year_string = mag_year_string and u"(" + mag_year_string + u")" or None

        mag_number_formatter = getFormatter(u", ", u", ")
        mag_number_string = mag_number_formatter(
            self.get_publication_title(), self.get_volume_title(), self.get_issue_title()
        )

        location = real_self.getUUIDUrl()
        if getattr(self, "canonical_uri", False):  # 3102
            location = real_self.directTranslate(Message(u"label_downloaded_via_recensio", "recensio"))

        citation_formatter = getFormatter(
            u"%(:)s %(review_of)s " % args, ", %(in)s " % args, ", %(page)s " % args, u", "
        )

        citation_string = citation_formatter(
            escape(rezensent_string),
            escape(item_string),
            escape(mag_number_string),
            self.page_start_end_in_print,
            location,
        )

        return citation_string