Example #1
0
 def getArticles(self,query,datetuple,location=constants.Geo):
     Articlelist=list()
     gs=GoogleSearch(query)
     gs._results_per_page = 10 #(default,dont change)
     #get date period
     start=datetuple[0]
     end=datetuple[1]
     month=constants.months[start[0:3]]
     year=int(start[-4:])
     day=int(start[4:-5])
     gs._set_start_date(month,day,year)
     month=constants.months[end[0:3]]
     year=int(end[-4:])
     day=int(end[4:-5])
     gs._set_end_date(month,day,year)
     #print month,day,year
     results = gs.get_results()
     
     for res in results:
         if(res.date == None):
             a=Article(res.title.encode('utf8'),res.url.encode('utf8'),res.desc.encode('utf8'),res.date)
         else:
             s=res.date.replace(',',' ').split()
             a=Article(res.title.encode('utf8'),res.url.encode('utf8'),res.desc.encode('utf8'),res.date,datetime.date(int(s[2]),constants.months[s[0]],int(s[1])))
         Articlelist.append(a)
     print '0'
     for i in range(1,constants.pagecount):
         gs._set_page(i)
         results = gs.get_results()
         for res in results:
             if(res.date == None):
                 a=Article(res.title.encode('utf8'),res.url.encode('utf8'),res.desc.encode('utf8'),res.date)
             else:
                 s=res.date.replace(',',' ').split()
                 a=Article(res.title.encode('utf8'),res.url.encode('utf8'),res.desc.encode('utf8'),res.date,datetime.date(int(s[2]),constants.months[s[0]],int(s[1])))
             Articlelist.append(a)
         print i
     return Articlelist
Example #2
0
#!/usr/bin/python
#
# This program does a Google search for "quick and dirty" and returns
# 50 results.
#

from xgoogle.search import GoogleSearch, SearchError
try:
  gs = GoogleSearch("sopa")
  gs._results_per_page = 10 #(default,dont change)
  print 'setting dates'
  gs._set_start_date(month=4,day=1,year=2012)
  gs._set_end_date(month=4,day=12,year=2012)
  #gs._set_page(page=2)
  print 'getting results'
  results = gs.get_results()
  for res in results:
    print res.title.encode('utf8')
    print res.desc.encode('utf8')
    print res.url.encode('utf8')
    print
except SearchError, e:
  print "Search failed: %s" % e

#span class="f nsa"
#re.findall(r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d\d*,\s\d{4}',i)