Esempio n. 1
0
    def getNews(self):
      se = Searcher()
      field = se.getall()
      adj = self.getAdj()
      res = Resources()
      classifier = self.getClassifier(adj)
      stopwords = nltk.corpus.stopwords.words('portuguese')

      for a in field: #Obtem todas as noticias indexadas no woosh
        try:
          n_id = int(float(field[a]["id"]))
          n_title = field[a]["title"]
          n_link = field[a]["link"]
          n_content = field[a]["content"]
          try:
              namesread = codecs.open("libra/politics/management/commands/utils/resources/names_plainText.txt",'r','latin-1')
              entidades = res.encontraNomes(n_title + "" + n_content, '');
              print entidades
          except UnicodeEncodeError:
            print "Erro a encontrar entidades"

          news = News.objects.create_Simple_News(n_id, n_title, n_link, n_content)
          Future(self.insereBD, entidades, news, adj, classifier, stopwords)
        except UnicodeEncodeError:
          print "Erro de encode aqui"
Esempio n. 2
0
def single_news(request, n_id):
    template = 'entity_content.html'
    news = News.objects.get(id = n_id)
    lista_pessoas = []
    for p in news.entities.all():
        lista_pessoas.append(p)
        
    n_title = news.content;
    n_content = news.title;
    path = os.path.join(SITE_ROOT, "politics/management/commands/utils/resources/names_plainText.txt")
    namesread = codecs.open(path,'r',encoding='latin-1')
    res = Resources()
    entities = res.encontraNomes(n_title + "" + n_content, namesread)
    
    c = Context({'news': news, 'lista_pessoas': lista_pessoas, 'entidades':entities})
    return render(request, template, c)
Esempio n. 3
0
    def parseFeeds(self):
        # print "esta aqui"
        # print Searcher().getcount()
        adj = self.getAdj()
        res = Resources()
        classifier = self.getClassifier(adj)
        hit_list = [ "http://feeds.jn.pt/JN-Politica", "http://feeds.dn.pt/DN-Politica", "http://economico.sapo.pt/rss/politica" ] # list of feeds to pull down
        future_calls = [feedparser.parse(rss_url) for rss_url in hit_list]
        # block until they are all in
        feeds = [x for x in future_calls]
        print feeds
        entries = []
        for feed in feeds:
            entries.extend(feed[ "items" ])


        for e in entries:
          n_title = e['title']
          # print "vai adicionar", n_title
          n_link = e['link']
          summary = re.split("<", e['summary'])
          n_content = summary[0]

          try:
              entidades = res.encontraNomes(n_title + "" + n_content, '');
          except UnicodeEncodeError:
            print "Erro a encontrar entidades"

          try:
            #indexer.clean_index("libra/politics/indexdir")
            thisid = indexer.index_my_news(n_title,n_link,n_content, "libra/politics/indexdir", False)
            n_id = thisid
            print "searcher ",Searcher().getcount(), thisid
            if thisid:
              news = News.objects.create_Simple_News(n_id, n_title, n_link, n_content)
              Future(self.insereBD,entidades, news, adj, classifier)
          except TypeError:
            print n_title