Python search Examples, yos.boss.ysearch.search Python Examples

Example #1

0

Show file

File: confmania.py Project: psjinx/Scholar-Connect

def profData(name):

        dm_data = ysearch.search("Professor " + name, count= 3 )
        dmTb = db.create(name="prof", data= dm_data)


        ## Select the proper Domain Name from the Given Database Keys(), the Most HIT URL
        hitdic= {}
        for k in DATABASE.keys():
                hitdic[k]= 0
        
        #print "\nURL's returned: "
        for r in dmTb.rows:
                url= r["prof$url"]
                #print url

                ## for every domain saved,
                for k in DATABASE.keys():               
                        lt= k.split(".")
                        urllt= url.split(".") 

                        for e in lt[1:2]:
                                if e in urllt:
                                        hitdic[k]+=1
        
        ## Report the Domain with Maximum Hits  
        domain= findMax(hitdic) 

        return [name,domain]

Example #2

0

Show file

File: main.py Project: lethain/mahou

 def get(self):
   query = console.strfix(self.request.get("query"))
   count = int(console.strfix(self.request.get("count")))
   offset = int(console.strfix(self.request.get("offset")))
   data = ysearch.search(query,vertical="news",count=count,start=offset);
   images = db.create(data=data)
   serialized = simplejson.dumps(images.rows)
   self.response.out.write(serialized)

Example #3

0

Show file

File: whenqa.py Project: saurabhsahni/askBOSS

def whensearch(q):
  wt = db.select(udf=date_udf, data=ysearch.search(q, count=50))
  dates = db.group(by=["year", "month"], key="count", reducer=lambda x,y: x+y, aas="total", table=wt)
  dates = db.where(lambda r: r["month"] is not None and r["year"] is not None, table=dates)
  dates = db.sort(key="total", table=dates)

  if len(dates.rows) > 0:
    top = dates.rows[0]
    return top["month"], top["year"]

  return "None", "None"

Example #4

0

Show file

File: main.py Project: lethain/mahou

  def get(self):
    query = console.strfix(self.request.get("query"))
    count = int(console.strfix(self.request.get("count")))
    offset = int(console.strfix(self.request.get("offset")))
    includeDelicious = console.strfix(self.request.get("includeDelicious"))      

    search_results = ysearch.search(query,count=count,start=offset);
    web = db.create(data=search_results)
    if len(includeDelicious) == 4:
      dl = db.select(udfs.unnest_value, name="dl", url=u"http://feeds.delicious.com/rss/popular/%s" % query)
      web = db.join(overlap_predicate,[web,dl])

    serialized = simplejson.dumps(web.rows)
    self.response.out.write(serialized)

Example #5

0

Show file

File: boss_array.py Project: lethain/bossarray

    def _download(self, start, count, sort=True):
        rows = []
        length = 0
        for i in xrange(0, count, 50):
            offset = i * 50
            pos = start + offset
            num_results = min(count - offset, 50)
            data = ysearch.search(self.term,start=pos,count=num_results)
            length = data['ysearchresponse']['totalhits']
            rows = rows + db.create(data=data).rows

        self.length = int(length)
        self._cache(start,rows,sort=sort)
        return rows

Example #6

0

Show file

File: search.py Project: fuadmuhammad/searchkajian

  def get(self):
    rows = list()
    try:
      data = ysearch.search('',count=50, more={'sites':'kajian.net,ilmoe.com,radiorodja.com,radiomuslim.com,salafiyunpad.wordpress.com,problemamuslim.wordpress.com','style':'raw'})
      results = db.create(data=data)
      results = db.sort(key="date",table=results)
      rows = results.rows
    except (DownloadError):
      pass
	
    template_values = {
	'rows': rows,
    }
    path = os.path.join(os.path.dirname(__file__), 'newest.html')
    self.response.out.write(template.render(path, template_values))

Example #7

0

Show file

File: TopicSearch.py Project: PaulKinlan/a-topical

    def get(self, site = "", terms = "", subject = ""):
        tn = db.create("tn", data = ysearch.search(terms + " " + subject + " site:" + site, count = 4, more={"filter": "-p**n", "type": "html"}))
        results = []
        for row in tn.rows:
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                title =  un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, match.group(1), url)
                results.append(result)
        
        encoder = ResultJsonEncoder()

        self.response.out.write(encoder.encode(results))

Example #8

0

Show file

File: confmania.py Project: psjinx/Scholar-Connect

def PubSearch(name,domain):
        
        pub= ysearch.search(name + " publications site:" + domain, count= 2, more={'type':'html','abstract':'long'})
        pub = db.create(name="pub", data= pub)
        
        ## for publication page extracted,
        yrs= ['2005','2006','2007','2008','2009','2010','2011']
        dic= {}
        for y in yrs:   
                dic[y]= 0

        for r in pub.rows:
                url = r["pub$url"]
                print 
                dic= reportPubsYrs(url,dic)
        
        return dic

Example #9

0

Show file

File: wwwqa.py Project: lethain/mahou

def wwwsearch(q):
  qm = QueryMiner(q)

  def phrases_udf(r):
    r.update({"phrases": qm.extract(r)}) ; return r

  pc = defaultdict(lambda: 1)
  
  def pc_udf(r):
    for p in r["phrases"]: pc[p] += 1
    
  w = db.select(udf=phrases_udf, data=ysearch.search(q, count=50))
  db.select(udf=pc_udf, table=w)

  if len(pc) > 0:
    return sorted(pc.iteritems(), key=itemgetter(1), reverse=True)[0][0]

  return "None"

Example #10

0

Show file

File: confmania.py Project: psjinx/Scholar-Connect

def getHomePage(name,domain):

        ## Trucate the domain to remove www. leading
        dm= domain.split(".")                   
        domainsplt= dm[1]
        for dstr in dm[2:]:             
                domainsplt+= "."+dstr
        domain= domainsplt

        hm= ysearch.search("professor " + name + " site:"+domain, count= 1, more={'type':'html'})
        hm = db.create(name="hm", data= hm)
        
        ## assume single home-page 
        for r in hm.rows:       
                url = r["hm$url"]
                return url

        return None

Example #11

0

Show file

File: search.py Project: fuadmuhammad/searchkajian

  def get(self):
    query = self.request.get('q')
    if query == '':
	  query = "nikah"

    page = self.request.get('page')
    if page == '':
	  page = 1


    rows = list()
    prev = None
    next = None
    if query != '':
      try:
        data = ysearch.search(query, count=self.count, start = (int(page)-1)*10, more=	{'sites':'kajian.net,ilmoe.com,radiorodja.com,radiomuslim.com,salafiyunpad.wordpress.com,problemamuslim.wordpress.com'})
        results = db.create(data=data)
        rows = results.rows
        try:
          prev = data['ysearchresponse']['prevpage']
          prev = int(page)-1
        except (KeyError):
	  pass
	try:
          next = data['ysearchresponse']['nextpage']
	  next = int(page)+1
	except (KeyError):
          pass
      except (DownloadError):
        pass
	
    template_values = {
	'rows': rows,
	'query':query,
	'prev':prev,
	'next': next
    }
    path = os.path.join(os.path.dirname(__file__), 'index.html')
    self.response.out.write(template.render(path, template_values))

Example #12

0

Show file

    def get(self, site="", terms="", subject=""):
        tn = db.create("tn",
                       data=ysearch.search(terms + " " + subject + " site:" +
                                           site,
                                           count=4,
                                           more={
                                               "filter": "-p**n",
                                               "type": "html"
                                           }))
        results = []
        for row in tn.rows:
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                title = un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, match.group(1), url)
                results.append(result)

        encoder = ResultJsonEncoder()

        self.response.out.write(encoder.encode(results))

Example #13

0

Show file

File: search.py Project: saurabhsahni/Hacks

	def get(self):
		q = self.request.get("q")
		m = self.request.get("m")
		if q:
			start = self.request.get("p")
			query = q
			if m:
				query = m
			if start:
				result = search(query, count=10, start=int(start))
				images = search(query, vertical="images", count=1, start=int(start), filter="yes")
			else:
				result = search(query, count=10)
				images = search(query, vertical="images", count=1, filter="yes")
			resultset_glue = glue(q)
			ysr = result['ysearchresponse']
			if ysr.has_key('resultset_web'):
				results = ysr['resultset_web']
				template_values = {
					'query': q,
					'totalhits': int(ysr['totalhits']) + int(ysr['deephits']),
					'results': results,
                    'stats': memcache.get_stats()
                }
				if images:
					image_response = images['ysearchresponse']
					if int(image_response['count']) > 0:
						template_values['image'] = image_response['resultset_images'][0]
				if resultset_glue:
					categories = []
					if resultset_glue.has_key('glue') and resultset_glue['glue'].has_key('navbar'):
						navbars = resultset_glue['glue']['navbar']
						if navbars:
							for navbar in navbars:
								if isinstance(navbar, DictType):
									if navbar.has_key('navEntry'):
										if navbar['type'] == 'disambiguation':
											navEntries = navbar['navEntry']
											if isinstance(navEntries, DictType):
												categories.append(navEntries)
											else:
												for navEntry in navEntries:
													categories.append(navEntry)
						template_values['categories'] = categories
				if m:
					template_values['category'] = m.replace(" ", "%20")
				if start and int(start) != 0:
					template_values['start'] = start
					template_values['prev'] = int(start) - 10
					template_values['next'] = int(start) + 10
				else:
					template_values['next'] = 10
				path = os.path.join(os.path.dirname(__file__), "search.html")
				self.response.out.write(template.render(path, template_values))
			else:
				template_values = {
					'query': q,
				}
				path = os.path.join(os.path.dirname(__file__), "empty.html")
				self.response.out.write(template.render(path, template_values))
		else:
			self.redirect("/")

Example #14

0

Show file

File: wwwqa.py Project: saurabhsahni/askBOSS

def wwwsearch(q):
 # ans = memcache.get(q)
 # if ans:  
 #    return ans
  q=q.replace("'","")
  query = db.GqlQuery("SELECT * FROM QuestionAnswers WHERE question = '"+q+"'")
  result = query.get()
  if result:
    return result.answer
  
 
  qm = QueryMiner(q)

  def phrases_udf(r):
    r.update({"phrases": qm.extract(r)}) ; return r

  pc = defaultdict(lambda: 1)
  
  def pc_udf(r):
    for p in r["phrases"]: pc[p] += 1

    
  w = yql.db.select(udf=phrases_udf, data=ysearch.search(q, count=50))
  yql.db.select(udf=pc_udf, table=w)

  if len(pc) <= 0:
      return "Not Found"

  items= sorted(pc.iteritems(), key=itemgetter(1), reverse=True)
  if len(pc) > 0:
    ans= str((sorted(pc.iteritems(), key=itemgetter(1), reverse=True)[0][0]).encode('latin-1','ignore'))
  else:
    ans= "Not found"

  #memcache.add(key=q, value=ans)
  answer = QuestionAnswers(question=q,
 	            answer=ans)
  answer.put()
  return ans
  
  
  ##################### DONE ################
     
  index=-1
  topresults=[]
  for item in items:
      index=index+1
      inner=-1
      if index > 1:
         break
      count=item[1] 
      base= str((item[0]).encode('latin-1','ignore'))
      for phr in items:
         inner=inner+1
         if inner != index:
           text = str((phr[0]).encode('latin-1','ignore'))
           #print "text:" + text +" base: "+base
           if text.rfind(base) > -1:
            # print "Found"+base
             count=count+phr[1]
      topresults.append(count)           
#  print topresults
  indexc=0
  max=0
  maxi=0
  for result in topresults:
    if result > max:
        max = result
        maxi= indexc
#        print str(result)+" "+str(index)
    indexc = indexc + 1
#  print "yes"       
#  print str(items[maxi][0]) + " oldcount: "+ str(items[maxi][1]) + "newcount: " + str(max) + "index" + str(maxi)
  
    
  indexv=0
  max1=0
  maxi1=0
  for result in topresults:
    if indexv != maxi:    
      if result > max1:
         max1 = result
         maxi1= indexv
    indexv = indexv + 1 
 # print "yes"
#  print str(items[maxi1][0]) + " oldcount: "+ str(items[maxi1][1]) + "newcount: " + str(max1) + "index" + str(maxi1)
     
  ans=str((items[maxi][0]).encode('latin-1','ignore')) #+ ", " + str(repr(items[maxi1][0]))
 
 

#  if len(pc) > 0:
#    ans= str(sorted(pc.iteritems(), key=itemgetter(1), reverse=True)[0][0])
#  else:
#    ans= "Not found"

#  memcache.add(key=q, value=ans)
  answer = Answers(question=q,
	            answer=ans)
  answer.put()
  return ans

Example #15

0

Show file

File: wwwqa.py Project: saurabhsahni/askBOSS

def imgsearch(q,start=0,ques=""):
    image_results=[]
    image_results1=[]
    image_results2=[]
    count=0
    count1=0
    count2=0
    
    if ques!="":
        q2=text.mynorm(ques)
        if q2:
            ques=q2
            
        images2 = ysearch.search(ques, vertical="images", count=18, start=(int(start)/3))
        if images2:
            image_response2 = images2['bossresponse']
            count2 = int(image_response2['images']['totalresults']) # + int(image_response['deephits'])
            if count2 > 0:
              image_results2 = image_response2['images']['results'] 

        images1 = ysearch.search(q, vertical="images", count=18, start=(int(start)*2/3))
        if images1:
            image_response1 = images1['bossresponse']
            count1 = int(image_response1['images']['totalresults']) # + int(image_response['deephits'])
            if count1 > 0: 
              image_results1 = image_response1['images']['results']  
        
        count=count1 + count2
        
        c1=len(image_results1)
        c2=len(image_results2)
        
        if c1>=12 and c2>=6:   #both many
           image_results = image_results2[:6] + image_results1[:12]  
        else:
            
           if c1 >=12 and c2 > 0:  #more c1
              newc1=18-c2 
              image_results = image_results1[:newc1] + image_results2[:c2] 
           elif c2 >=6 and c1 > 0:  #more c2
              newc2=18-c1 
              image_results = image_results1[:c1] + image_results2[:newc2] 
           elif c1<=12 and c2<=6 and c1>0 and c2>0:   #both less
              image_results = image_results2[:6] + image_results1[:12]  
                    
           elif c1>0:
              newstart=int(start)-count2
              if int(newstart)<0:
                  newstart=0   
              images1 = ysearch.search(q, vertical="images", count=18, start=int(newstart))
              if images1:
                 image_response1 = images1['bossresponse']
                 count = int(image_response1['images']['totalresults']) # + int(image_response['deephits'])
                 if count>0:
                   image_results = image_response1['images']['results']                
           elif c2>0:
               
               newstart=int(start)-count1
               if int(newstart)<0:
                      newstart=0               
               
               images1 = ysearch.search(ques, vertical="images", count=18, start=int(newstart))
               if images1:
                  image_response1 = images1['bossresponse']
                  count = int(image_response1['images']['totalresults']) # + int(image_response['deephits'])
                  if count>0:
                    image_results = image_response1['images']['results']               
           
        
    else:
        if q!="":
          q2=text.mynorm(q)
          if q2:
              q=q2
        images1 = ysearch.search(q, vertical="images", count=18, start=int(start))
        if images1:
           image_response1 = images1['bossresponse']
           count = int(image_response1['images']['totalresults']) # + int(image_response['deephits'])
           if count>0:
             image_results = image_response1['images']['results']   
           
    
    
    random.seed(2)
    random.shuffle(image_results)      
    if image_results:
       # image_results = image_response1['resultset_images'] + image_response2['resultset_images']
        image = "<table width=\"1000\" border=0 ><tr><font size='2'>"
        if int(count) > 0:
          i=0
          size=0
          for images in image_results:
            if i > 0:
              if i % 6 == 0:
                image = image + "</tr><tr>"
            i+=1
            intSize=0
            try:
              intSize = float(images['size'])
            except ValueError:
              size=images['size']
            
            if intSize > 1024:
               intSize = intSize / 1024
               size = str(int(intSize)) + "K"
            elif intSize>0:
               size = str(int(intSize)) + "B"
            name=images['title']
            if len(name) > 17:
               name=name[:15]+"..."
            domain=images['refererurl']
            if len(domain) > 20:
	           domain=domain[:18]+"..."
            image = image +"<td><table><tr><td height='160px' style='vertical-align:bottom;'><a href='"+images['refererclickurl']+"'><img title='"+images['refererurl']+"'src='"+images['thumbnailurl']+"' style='max-height:150px;max-width:150px;'></a></td></tr><tr><td><center><small>"+name+"</small></center></td></tr><tr><td><center><font color='#444444'><small>"+images['width']+" X "+ images['height']+" | " + size + "</font></small></center></td></tr><tr><td><center><small><font color='#003399'>"+domain+"</font></small></center></td></tr></table></td> "
          
          image = image + "</font</tr></table>"
          return image,count     
        else:
          return "<br>No results found",0
    return "<br>No results found",0

Example #16

0

Show file

File: ex2.py Project: kamalgovind/bossmashup

# See accompanying LICENSE file or http://www.opensource.org/licenses/BSD-3-Clause for the specific language governing permissions and limitations under the License.


"""
Search yahoo news and twitter for facebook
Combine results with techmeme feeds based on titles having at least 2 term overlap
Print results to stdout
"""

__author__ = "BOSS Team"

from util import console, text
from yos.yql import db, udfs
from yos.boss import ysearch

gn = db.create(name="gn", data=ysearch.search("facebook", bucket="news", count=40))
gn.rename("headline", "title")

sm = db.create(name="sm", url="http://search.twitter.com/search.json?q=facebook&rpp=40")
sm.rename("text", "title")

tm = db.select(name="tm", udf=udfs.unnest_value, url="http://techmeme.com/firehose.xml")

def overlap(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 1

j = db.join(overlap, [gn, sm, tm])
j = db.sort(key="sm$id", table=j)

for r in j.rows:
  console.write( "\n%s\n[yahoo] %s\n[twitter] %s\n[techmeme] %s\n" % (r["sm$created_at"], r["gn$title"], r["sm$title"], r["tm$title"]) )

Example #17

0

Show file

File: ex5.py Project: kamalgovind/bossmashup

"""
Search 'iphone' on yahoo news and sort by date
Get the wikipedia edits for the iphone page
Rank the news results based on their title/text overlap with the wikipedia entries
Sort by the overlap sizes
This could potentially be a new freshness model, based on the idea that wikipedia is updated for recent significance
"""

__author__ = "BOSS Team"

from util import console, text
from yos.boss import ysearch
from yos.yql import db

yn = db.create(name="yn", data=ysearch.search("iphone sdk", bucket="news", count=50))
wiki = db.create(name="wiki", url="http://en.wikipedia.org/w/index.php?title=IPhone_OS&feed=atom&action=history")

tb = db.cross([yn, wiki])

def rankf(row):
  row.update( {"rank": text.overlap(row["yn$abstract"], row["wiki$summary"]["value"])} ) ; return row

tb = db.select(udf=rankf, table=tb)
tb = db.group(by=["yn$title"], key="rank", reducer=lambda d1,d2: d1+d2, as="total", table=tb, norm=text.norm)
tb = db.sort(key="total", table=tb)

print "Before\n"
for r in yn.rows:
  console.write( "[news] %s\n" % r["yn$title"] )

Example #18

0

Show file

File: TopicSearch.py Project: PaulKinlan/a-topical

    def get(self, topic = "", subject=""):        
        #Use the query string parameters if present.
        topic = self.request.get("topic", topic)
        
        formatter = Url.UrlFormat()
        topic = formatter.removeXSS(topic)
        
        if topic == "" or topic is None:
            topic = "Paul Kinlan"
            
        topic = urllib.unquote_plus(topic)
               
        subject = self.request.get("subject", subject)
        
        if subject == "" or subject is None:
            subject = topic

        subject = urllib.unquote_plus(subject)

        topicSearch = Model.TopicSearch.gql("WHERE topic = :1", topic.lower()).get()
        
        if topicSearch is None:
            topicSearch = Model.TopicSearch(topic = topic.lower())
            
        topicSearch.topicCount = topicSearch.topicCount + 1
        topicSearch.put()
        
        subjectSearch = Model.SubjectSearch.gql("WHERE subject = :1", subject.lower()).get()
        if subjectSearch is None:
            subjectSearch = Model.SubjectSearch(subject = subject.lower())
            
        if subjectSearch.sitesData is None:
            tn = db.create(name="tn", data=ysearch.search(subject, vertical="web", count=10, more={"filter": "-p**n", "type": "html"}))
            subjectSearch.sitesData = tn.dumps()
        else:
            tn = db.create(name = "tn", data = db.simplejson.loads(subjectSearch.sitesData))
        
        subjectSearch.subjectCount = subjectSearch.subjectCount + 1
        
        subjectSearch.put()
        
        subjectTopicSearch = Model.TopicSubjectSearch.gql("WHERE topic = :1 AND subject = :2", topicSearch, subjectSearch).get()
        if subjectTopicSearch is None:
            subjectTopicSearch = Model.TopicSubjectSearch(topic = topicSearch, subject = subjectSearch)
            
        subjectTopicSearch.count = subjectTopicSearch.count + 1
        subjectTopicSearch.put()        
                
        results = { 0 : [], 1: []}
        urls = {}
        column = 0
        for row in tn.rows:            
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                domain = match.group(1)
                title =  un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, domain, url)
                urls[domain] = result
                
        for result in urls:
            results[column % 2].append(urls[result])
            column = column + 1
        
        path = os.path.join(os.path.dirname(__file__), 'results.tmpl')
        
        self.response.out.write(template.render(path, {'decoded_query': topic, "decoded_subject": subject, 'type': topicSearch.type, 'query': topic, 'subject': subject , 'urls1': results[0], 'urls2': results[1]}))

Example #19

0

Show file

    def get(self, topic="", subject=""):
        #Use the query string parameters if present.
        topic = self.request.get("topic", topic)

        formatter = Url.UrlFormat()
        topic = formatter.removeXSS(topic)

        if topic == "" or topic is None:
            topic = "Paul Kinlan"

        topic = urllib.unquote_plus(topic)

        subject = self.request.get("subject", subject)

        if subject == "" or subject is None:
            subject = topic

        subject = urllib.unquote_plus(subject)

        topicSearch = Model.TopicSearch.gql("WHERE topic = :1",
                                            topic.lower()).get()

        if topicSearch is None:
            topicSearch = Model.TopicSearch(topic=topic.lower())

        topicSearch.topicCount = topicSearch.topicCount + 1
        topicSearch.put()

        subjectSearch = Model.SubjectSearch.gql("WHERE subject = :1",
                                                subject.lower()).get()
        if subjectSearch is None:
            subjectSearch = Model.SubjectSearch(subject=subject.lower())

        if subjectSearch.sitesData is None:
            tn = db.create(name="tn",
                           data=ysearch.search(subject,
                                               vertical="web",
                                               count=10,
                                               more={
                                                   "filter": "-p**n",
                                                   "type": "html"
                                               }))
            subjectSearch.sitesData = tn.dumps()
        else:
            tn = db.create(name="tn",
                           data=db.simplejson.loads(subjectSearch.sitesData))

        subjectSearch.subjectCount = subjectSearch.subjectCount + 1

        subjectSearch.put()

        subjectTopicSearch = Model.TopicSubjectSearch.gql(
            "WHERE topic = :1 AND subject = :2", topicSearch,
            subjectSearch).get()
        if subjectTopicSearch is None:
            subjectTopicSearch = Model.TopicSubjectSearch(
                topic=topicSearch, subject=subjectSearch)

        subjectTopicSearch.count = subjectTopicSearch.count + 1
        subjectTopicSearch.put()

        results = {0: [], 1: []}
        urls = {}
        column = 0
        for row in tn.rows:
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                domain = match.group(1)
                title = un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, domain, url)
                urls[domain] = result

        for result in urls:
            results[column % 2].append(urls[result])
            column = column + 1

        path = os.path.join(os.path.dirname(__file__), 'results.tmpl')

        self.response.out.write(
            template.render(
                path, {
                    'decoded_query': topic,
                    "decoded_subject": subject,
                    'type': topicSearch.type,
                    'query': topic,
                    'subject': subject,
                    'urls1': results[0],
                    'urls2': results[1]
                }))

Example #20

0

Show file

File: ex6.py Project: kamalgovind/bossmashup

  for m in ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec"]:
    if s.startswith(m):
      return m

def parse_month(s):
  months = filter(lambda m: m is not None, map(month_lookup, text.uniques(s)))
  if len(months) > 0:
    return text.norm(months[0])

def parse_year(s):
  years = filter(lambda t: len(t) == 4 and t.startswith("19") or t.startswith("200"), text.uniques(s))
  if len(years) > 0:
    return text.norm(years[0])

def date_udf(r):
  return {"year": parse_year(r["abstract"]), "month": parse_month(r["abstract"]), "count": 1}

# since max fetch size in v1 is 50, let's do two calls and increment start to get the first 100 results
i1 = db.select(name="i1", udf=date_udf, data=ysearch.search("when was jfk assasinated", count=50))
i2 = db.select(name="i2", udf=date_udf, data=ysearch.search("when was jfk assasinated", start=50, count=50))

iraq = db.union(name="iraq", tables=[i1, i2])
dates = db.group(by=["iraq$year", "iraq$month"], key="iraq$count", reducer=lambda d1,d2: d1+d2, as="total", table=iraq)
dates = db.sort(key="total", table=dates)

for row in dates.rows:
  month = row["iraq$month"]
  year = row["iraq$year"]
  if month is not None and year is not None:
    console.write( "Month: %s\tYear: %s\tTotal: %d\n" % (month, year, row["total"]) )

Example #21

0

Show file

 def yahoo_search(vars):
     from yos.boss import ysearch
     from yos.yql import db
     data = ysearch.search(vars['query'], count=10)
     table = db.create(data=data)
     return table.rows

Example #22

0

Show file

File: yahoo.py Project: dwiel/axpress-nlp

 def yahoo_search(vars):
   from yos.boss import ysearch
   from yos.yql import db
   data = ysearch.search(vars['query'],count=10)
   table = db.create(data=data)
   return table.rows