Beispiel #1
0
 def _split_entry_text(self, text):
     """Takes main entry text and returns tuple with title part
     and a part containing information about dates & times.
     """
     if '\n' in text:
         parts = text.split('\n')
         title = parts[0]
         for info in parts[1:]:
             dates = self.entry_split_price_re.split(info, maxsplit=1)[0]
             yield clean_whitespace(title), clean_whitespace(dates)
     else:
         title, info = self.entry_split_re.split(text, maxsplit=1)
         dates = self.entry_split_price_re.split(info, maxsplit=1)[0]
         yield clean_whitespace(title), clean_whitespace(dates)
Beispiel #2
0
 def _split_entry_text(self, text):
     """Takes main entry text and returns tuple with title part
     and a part containing information about dates & times.
     """
     if '\n' in text:
         parts = text.split('\n')
         title = parts[0]
         for info in parts[1:]:
             dates = self.entry_split_price_re.split(info, maxsplit=1)[0]
             yield clean_whitespace(title), clean_whitespace(dates)
     else:
         title, info = self.entry_split_re.split(text, maxsplit=1)
         dates = self.entry_split_price_re.split(info, maxsplit=1)[0]
         yield clean_whitespace(title), clean_whitespace(dates)
Beispiel #3
0
    def _extract_entry_text(self, entry):
        """Extracts relevant entry text from given STRONG element and it's
        siblings (sometimes film entry actually consists of multiple STRONG
        elements as someone made the text bold by selecting multiple
        parts of it and pushing the button in WYSIWYG editor).
        """
        title_text = self._extract_entry_siblings_text(entry, 'previous')
        title_text += (entry.text_content(whitespace=True) or '')
        title_text += self._extract_entry_siblings_text(entry, 'next')

        details_text = self._extract_entry_tail_text(entry)

        return title_text.strip(), clean_whitespace(details_text)
Beispiel #4
0
    def _extract_entry_text(self, entry):
        """Extracts relevant entry text from given STRONG element and it's
        siblings (sometimes film entry actually consists of multiple STRONG
        elements as someone made the text bold by selecting multiple
        parts of it and pushing the button in WYSIWYG editor).
        """
        title_text = self._extract_entry_siblings_text(entry, 'previous')
        title_text += (entry.text_content(whitespace=True) or '')
        title_text += self._extract_entry_siblings_text(entry, 'next')

        details_text = self._extract_entry_tail_text(entry)

        return title_text.strip(), clean_whitespace(details_text)
Beispiel #5
0
    def text_content(self, whitespace=False):
        """Returns text content, by default with normalized whitespace."""
        if whitespace:
            # add newline after every <br>
            for br in self.xpath('.//br'):
                if br.tail:
                    br.tail = '\n' + br.tail
                else:
                    br.tail = '\n'

            # get the text
            text = super(HTMLElement, self).text_content()

            # remove added newlines
            for br in self.xpath('.//br'):
                br.tail = br.tail[1:]

            # provide the text
            return text

        # replace all whitespace with single spaces
        text = super(HTMLElement, self).text_content()
        return clean_whitespace(text)
Beispiel #6
0
    def text_content(self, whitespace=False):
        """Returns text content, by default with normalized whitespace."""
        if whitespace:
            # add newline after every <br>
            for br in self.xpath('.//br'):
                if br.tail:
                    br.tail = '\n' + br.tail
                else:
                    br.tail = '\n'

            # get the text
            text = super(HTMLElement, self).text_content()

            # remove added newlines
            for br in self.xpath('.//br'):
                br.tail = br.tail[1:]

            # provide the text
            return text

        # replace all whitespace with single spaces
        text = super(HTMLElement, self).text_content()
        return clean_whitespace(text)
Beispiel #7
0
 def text_content(self, whitespace=False):
     """Returns text content, by default with normalized whitespace."""
     text = super(HTMLElement, self).text_content()
     if whitespace:
         return text
     return clean_whitespace(text)
Beispiel #8
0
 def text_content(self, whitespace=False):
     """Returns text content, by default with normalized whitespace."""
     text = super(HTMLElement, self).text_content()
     if whitespace:
         return text
     return clean_whitespace(text)