Beispiel #1
0
    def save(self, *args, **kwargs):
        from goose import Goose
        from text.blob import TextBlob
        g = Goose()
        article = g.extract(url=self.url)
        try:
            b = TextBlob(article.title)
            lang = b.detect_language()
        except:
            lang='en'

        g = Goose({'use_meta_language': False, 'target_language':lang, 'paper_class':'soup'})
        if not self.title:
            self.title = article.title
        if not self.newspaper:
            self.newspaper = article.domain
        if not self.content:
            self.content = article.cleaned_text
        try:
            if article.top_image.src:
                layout = Photo()
                #layout.photo = "images/news/"+str(self.id)+".jpg"
                layout.url = article.top_image.src
                layout.article = self
                layout.save() 
        except:
            pass
        super(Article, self).save()
Beispiel #2
0
 def save(self):
     if not self.content:
         articles = self.article.all()
         content = ''
         for a in articles:
             content += a.content + '\r'
         self.content = content
         content = content.replace(u'–', '-')
         content = content.replace(u'“', '"')
         content = content.replace(u'”', '"')
         content = content.replace(u'’', "'")
         content = content.replace('[\d]', "")
     if not self.summary:
         self.summary = get_summary(self.content, self.compression)
         from text.blob import TextBlob
         try:
             b = TextBlob(self.content.split('\n', 1)[0])
             lang = b.detect_language()
         except:
             lang='en'
         
         o = ots.OTS(lang, self.compression)
         filename = u'text'+str(self.id)+'.txt'
         f = open(filename, 'w')
         f.write(self.content.encode("utf-8"))
         f.close()
         o.parse(filename, 60)
         try: 
             os.remove(filename)
         except:
             pass
         self.summary = str(o)
     if not self.spin_summary:
         self.spin_summary = get_text_synonymizer(self.summary)
     super(Syntesis, self).save()
Beispiel #3
0
#Spelling Correction
a = TextBlob("I aem one oef a kind cheat")
print(a.correct())

t = Word('Astonised')
t.spellcheck()

#Word counts
text = TextBlob(
    "One time there was this pycheat in github. And it was Github where all the cheats were"
)
text.word_counts['github']

#Translation
text = TextBlob("Frankly!Can I get a cup of coffe con leche in plaza Mayor?")
text.detect_language()
text.translate(to="es")

#Parsing

text.parse()

#Fetch words
text[0:7]

#Capital letters
text.upper()
text.lower()

#Find
text.find("leche")
Beispiel #4
0
w.lemma

#Spelling Correction
a = TextBlob("I aem one oef a kind cheat")
print(a.correct())

t = Word('Astonised')
t.spellcheck()

#Word counts
text = TextBlob("One time there was this pycheat in github. And it was Github where all the cheats were")
text.word_counts['github']

#Translation
text = TextBlob("Frankly!Can I get a cup of coffe con leche in plaza Mayor?")
text.detect_language()
text.translate( to="es")

#Parsing

text.parse()

#Fetch words
text[0:7]

#Capital letters
text.upper()
text.lower()

#Find
text.find("leche")