예제 #1
0
def analyzeAll():
    '''
  create a view to find article that are not analyzed
  
  itterate over those, single threaded
  '''
    view = dblayer.view("article/notAnalized")
    for u in view:
        analyzeArticle.perform(u.id)
예제 #2
0
파일: maestro.py 프로젝트: kimbss/WhatNow
def analyzeAll():
  '''
  create a view to find article that are not analyzed
  
  itterate over those, single threaded
  '''
  view=dblayer.view("article/notAnalized")
  for u in view :
    analyzeArticle.perform(u.id)
예제 #3
0
 def findById(self) :
   '''
   return question that matches id
   '''
   view = dblayer.view("question/id", self.id)
   if len(view) == 0:
     return None
   elif len(view) == 1:
     for q in view : return Question.load(getDb(), q.id)
   else:
     print 'ERROR: more than one question for this ID'
     raise IntegrityConstraintException
예제 #4
0
 def findById(self):
     '''
 return question that matches id
 '''
     view = dblayer.view("question/id", self.id)
     if len(view) == 0:
         return None
     elif len(view) == 1:
         for q in view:
             return Question.load(getDb(), q.id)
     else:
         print 'ERROR: more than one question for this ID'
         raise IntegrityConstraintException
예제 #5
0
 def findByQuestion(self):
     view = dblayer.view("timeLineEvent/question", self.question)
     return view
예제 #6
0
 def findByQuestion(self) :
   view=dblayer.view("timeLineEvent/question",self.question)
   return view
예제 #7
0
        url = urlList[i].get('href')
        url = url[:url.find('?')]
        article._id = url
        article.link = article._id
        article.extract = summaryList[i].text
        article.keyword = keyword
        article.source = 'nyt'
        print article._id
        print article.title
        print article.extract
        print article.date
        article.create()
    
#return num of search result
def resultNum(soup):
    list = soup('span', {'class':'sortText'})
    if len(list)<=0:
        return 0
    else:
        num = list[-1].text.split()[-2]
        return int(num)

#################TEST####################
if __name__ == '__main__':
    view=dblayer.view("article/test")
    for u in view :
        a = Article(u.id)
        a=a.findById()
        getDb().delete(a)
    wrapNYTimes('laden', 1, pastDay=3)
예제 #8
0
            url = 'http://www.washingtonpost.com/newssearch/search.html?sa=as&sd=%s&ed=%s&st=%s&cp=%d' % (
                sd, sd, keyword, j + 1)
            url += '&fa_1_sourcenavigator=%22The+Washington+Post%22&fa_1_sourcenavigator=washingtonpost.com&fa_1_mediatypenavigator=^Articles%24'
            try:
                text = urlopen(url).read()
            except:
                print 'error occur during connect to url %s and read contents' % url
                continue
            try:
                wp.feed(text.decode('cp949', errors='replace'))
            except:
                print 'error occur during parsing %s' % url
                continue
            print 'wrapping WashingtonPost : ' + str(
                searchDate) + ', page ' + str(j + 1)
            print url
            wp.storeArticle(keyword, searchDate)
            wp.close()
        searchDate -= oneDay
    print 'done'


#################TEST####################
if __name__ == '__main__':
    view = dblayer.view("article/test")
    for u in view:
        a = Article(u.id)
        a = a.findById()
        getDb().delete(a)
    keyword = 'laden'
    wrapWPost(keyword, 1, 3)