Python paragraphs Examples

Programming Language: Python

Namespace/Package Name: html2text

Method/Function: paragraphs

Examples at hotexamples.com: 2

Python paragraphs - 2 examples found. These are the top rated real world Python examples of html2text.paragraphs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: models.py Project: iangow/hal

    def _markers(self, window=3):
        text = self.text().decode('utf-8')
        pars = paragraphs(text)
        df = pd.DataFrame({
            'bio_word_match': matches(pars, self.BIO_WORDS),
            'text': pars,
        })

        last_names = self._director_last_names()
        for k in last_names:
            assert k not in df
            density = lambda s: float(len(k)) * float(s.lower().count(k.lower())) / float(len(s))
            df[k] = df.text.map(density)
        df['one_name'] = df[last_names].apply(max, 1)

        # Mark 5 paragraphs down from where we see a last name match
        df['name'] = pd.rolling_max(df.one_name, window=window, center=True)

        # Mark 5 paragraphs around where we see a bio word
        df['bio'] = pd.rolling_max(df.bio_word_match, window=window, center=True)

        # Multiply them together and see what we have
        df['mark'] = pd.rolling_mean(df.name * df.bio, window=window, center=True)

        s = np.zeros(len(df.mark))
        for i, flag in enumerate(df.mark):
            if flag:
                if s[i-1]:
                    s[i] = s[i-1]
                else:
                    s[i] = i
        df['group'] = s
        s = df.group.value_counts()
        g = s[s.index != 0].argmax()
        df['flag'] = df.group == g

        return df

Example #2

Show file

File: models.py Project: iangow/hal

def matching_paragraphs(text, last_names):
    pattern = '(%s)' % '|'.join(last_names)
    matching = [p for p in paragraphs(text) if re.search(pattern, p, re.IGNORECASE)]
    return matching