Beispiel #1
0
def profData(name):

        dm_data = ysearch.search("Professor " + name, count= 3 )
        dmTb = db.create(name="prof", data= dm_data)


        ## Select the proper Domain Name from the Given Database Keys(), the Most HIT URL
        hitdic= {}
        for k in DATABASE.keys():
                hitdic[k]= 0
        
        #print "\nURL's returned: "
        for r in dmTb.rows:
                url= r["prof$url"]
                #print url

                ## for every domain saved,
                for k in DATABASE.keys():               
                        lt= k.split(".")
                        urllt= url.split(".") 

                        for e in lt[1:2]:
                                if e in urllt:
                                        hitdic[k]+=1
        
        ## Report the Domain with Maximum Hits  
        domain= findMax(hitdic) 

        return [name,domain]
Beispiel #2
0
 def get(self):
   query = console.strfix(self.request.get("query"))
   count = int(console.strfix(self.request.get("count")))
   offset = int(console.strfix(self.request.get("offset")))
   data = ysearch.search(query,vertical="news",count=count,start=offset);
   images = db.create(data=data)
   serialized = simplejson.dumps(images.rows)
   self.response.out.write(serialized)
Beispiel #3
0
  def get(self):
    query = console.strfix(self.request.get("query"))
    count = int(console.strfix(self.request.get("count")))
    offset = int(console.strfix(self.request.get("offset")))
    includeDelicious = console.strfix(self.request.get("includeDelicious"))      

    search_results = ysearch.search(query,count=count,start=offset);
    web = db.create(data=search_results)
    if len(includeDelicious) == 4:
      dl = db.select(udfs.unnest_value, name="dl", url=u"http://feeds.delicious.com/rss/popular/%s" % query)
      web = db.join(overlap_predicate,[web,dl])

    serialized = simplejson.dumps(web.rows)
    self.response.out.write(serialized)
Beispiel #4
0
    def _download(self, start, count, sort=True):
        rows = []
        length = 0
        for i in xrange(0, count, 50):
            offset = i * 50
            pos = start + offset
            num_results = min(count - offset, 50)
            data = ysearch.search(self.term,start=pos,count=num_results)
            length = data['ysearchresponse']['totalhits']
            rows = rows + db.create(data=data).rows

        self.length = int(length)
        self._cache(start,rows,sort=sort)
        return rows
Beispiel #5
0
    def get(self, site = "", terms = "", subject = ""):
        tn = db.create("tn", data = ysearch.search(terms + " " + subject + " site:" + site, count = 4, more={"filter": "-p**n", "type": "html"}))
        results = []
        for row in tn.rows:
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                title =  un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, match.group(1), url)
                results.append(result)
        
        encoder = ResultJsonEncoder()

        self.response.out.write(encoder.encode(results))
Beispiel #6
0
  def get(self):
    rows = list()
    try:
      data = ysearch.search('',count=50, more={'sites':'kajian.net,ilmoe.com,radiorodja.com,radiomuslim.com,salafiyunpad.wordpress.com,problemamuslim.wordpress.com','style':'raw'})
      results = db.create(data=data)
      results = db.sort(key="date",table=results)
      rows = results.rows
    except (DownloadError):
      pass
	
    template_values = {
	'rows': rows,
    }
    path = os.path.join(os.path.dirname(__file__), 'newest.html')
    self.response.out.write(template.render(path, template_values))
Beispiel #7
0
def PubSearch(name,domain):
        
        pub= ysearch.search(name + " publications site:" + domain, count= 2, more={'type':'html','abstract':'long'})
        pub = db.create(name="pub", data= pub)
        
        ## for publication page extracted,
        yrs= ['2005','2006','2007','2008','2009','2010','2011']
        dic= {}
        for y in yrs:   
                dic[y]= 0

        for r in pub.rows:
                url = r["pub$url"]
                print 
                dic= reportPubsYrs(url,dic)
        
        return dic
Beispiel #8
0
def getHomePage(name,domain):

        ## Trucate the domain to remove www. leading
        dm= domain.split(".")                   
        domainsplt= dm[1]
        for dstr in dm[2:]:             
                domainsplt+= "."+dstr
        domain= domainsplt

        hm= ysearch.search("professor " + name + " site:"+domain, count= 1, more={'type':'html'})
        hm = db.create(name="hm", data= hm)
        
        ## assume single home-page 
        for r in hm.rows:       
                url = r["hm$url"]
                return url

        return None                     
Beispiel #9
0
  def get(self):
    query = self.request.get('q')
    if query == '':
	  query = "nikah"

    page = self.request.get('page')
    if page == '':
	  page = 1


    rows = list()
    prev = None
    next = None
    if query != '':
      try:
        data = ysearch.search(query, count=self.count, start = (int(page)-1)*10, more=	{'sites':'kajian.net,ilmoe.com,radiorodja.com,radiomuslim.com,salafiyunpad.wordpress.com,problemamuslim.wordpress.com'})
        results = db.create(data=data)
        rows = results.rows
        try:
          prev = data['ysearchresponse']['prevpage']
          prev = int(page)-1
        except (KeyError):
	  pass
	try:
          next = data['ysearchresponse']['nextpage']
	  next = int(page)+1
	except (KeyError):
          pass
      except (DownloadError):
        pass
	
    template_values = {
	'rows': rows,
	'query':query,
	'prev':prev,
	'next': next
    }
    path = os.path.join(os.path.dirname(__file__), 'index.html')
    self.response.out.write(template.render(path, template_values))
Beispiel #10
0
    def get(self, site="", terms="", subject=""):
        tn = db.create("tn",
                       data=ysearch.search(terms + " " + subject + " site:" +
                                           site,
                                           count=4,
                                           more={
                                               "filter": "-p**n",
                                               "type": "html"
                                           }))
        results = []
        for row in tn.rows:
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                title = un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, match.group(1), url)
                results.append(result)

        encoder = ResultJsonEncoder()

        self.response.out.write(encoder.encode(results))
Beispiel #11
0
 def yahoo_search(vars):
     from yos.boss import ysearch
     from yos.yql import db
     data = ysearch.search(vars['query'], count=10)
     table = db.create(data=data)
     return table.rows
Beispiel #12
0
Four way of join of 'google android' on yahoo news, summize, youtube, and digg
Combine results based on titles having an overlap of 3 terms or more
Group results based on yahoo news title (remove duplicates)
Redefined the group by equality operator to use text.norm to do near duplicate text removal
In the group sum the digg and youtube favorite counts as the rank for each joined result
Sort by rank, print to stdout
"""

__author__ = "BOSS Team"

from util import console, text
from yos.yql import db
from yos.boss import ysearch

ynews_data = ysearch.search_v1("google android", vertical="news", count=100, more={"news.ranking": "date"})
ynews = db.create(name="ynews", data=ynews_data)
ynews.rename(before="headline", after="title")

sm = db.create(name="sm", url="http://summize.com/search.json?q=google+android&rpp=60&lang=en")
sm.rename(before="text", after="title")

ytf = lambda r: {"title": r["title"]["value"], "favorites": int(r["statistics"]["favoriteCount"])}
yt = db.select(name="yt", udf=ytf, url="http://gdata.youtube.com/feeds/api/videos?vq=google+android&lr=en&orderby=published")

diggf = lambda r: {"title": r["title"]["value"], "diggs": int(r["diggCount"]["value"])}
digg = db.select(name="dg", udf=diggf, url="http://digg.com/rss_search?search=google+android&area=dig&type=both&section=news")

def overlap_predicate(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 2

tb = db.join(overlap_predicate, [ynews, sm, digg, yt])
Beispiel #13
0
# See accompanying LICENSE file or http://www.opensource.org/licenses/BSD-3-Clause for the specific language governing permissions and limitations under the License.


"""
Search yahoo news and twitter for facebook
Combine results with techmeme feeds based on titles having at least 2 term overlap
Print results to stdout
"""

__author__ = "BOSS Team"

from util import console, text
from yos.yql import db, udfs
from yos.boss import ysearch

gn = db.create(name="gn", data=ysearch.search_v1("facebook", vertical="news", count=40))
gn.rename("headline", "title")

sm = db.create(name="sm", url="http://search.twitter.com/search.json?q=facebook&rpp=40")
sm.rename("text", "title")

tm = db.select(name="tm", udf=udfs.unnest_value, url="http://techmeme.com/firehose.xml")

def overlap(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 1

j = db.join(overlap, [gn, sm, tm])
j = db.sort(key="sm$id", table=j)

for r in j.rows:
  console.write( "\n%s\n[yahoo] %s\n[twitter] %s\n[techmeme] %s\n" % (r["sm$created_at"], r["gn$title"], r["sm$title"], r["tm$title"]) )
Beispiel #14
0
"""
Search 'iphone' on yahoo news and sort by date
Get the wikipedia edits for the iphone page
Rank the news results based on their title/text overlap with the wikipedia entries
Sort by the overlap sizes
This could potentially be a new freshness model, based on the idea that wikipedia is updated for recent significance
"""

__author__ = "BOSS Team"

from util import console, text
from yos.boss import ysearch
from yos.yql import db

yn = db.create(name="yn", data=ysearch.search("iphone sdk", bucket="news", count=50))
wiki = db.create(name="wiki", url="http://en.wikipedia.org/w/index.php?title=IPhone_OS&feed=atom&action=history")

tb = db.cross([yn, wiki])

def rankf(row):
  row.update( {"rank": text.overlap(row["yn$abstract"], row["wiki$summary"]["value"])} ) ; return row

tb = db.select(udf=rankf, table=tb)
tb = db.group(by=["yn$title"], key="rank", reducer=lambda d1,d2: d1+d2, as="total", table=tb, norm=text.norm)
tb = db.sort(key="total", table=tb)

print "Before\n"
for r in yn.rows:
  console.write( "[news] %s\n" % r["yn$title"] )
Beispiel #15
0
"""
Search 'iphone' on yahoo news and sort by date
Get the wikipedia edits for the iphone page
Rank the news results based on their title/text overlap with the wikipedia entries
Sort by the overlap sizes
This could potentially be a new freshness model, based on the idea that wikipedia is updated for recent significance
"""

__author__ = "BOSS Team"

from util import console, text
from yos.boss import ysearch
from yos.yql import db

yn = db.create(name="yn", data=ysearch.search_v1("iphone sdk", vertical="news", count=50, more={"news.ranking": "date"}))
wiki = db.create(name="wiki", url="http://en.wikipedia.org/w/index.php?title=IPhone_OS&feed=atom&action=history")

tb = db.cross([yn, wiki])

def rankf(row):
  row.update( {"rank": text.overlap(row["yn$abstract"], row["wiki$summary"]["value"])} ) ; return row

tb = db.select(udf=rankf, table=tb)
tb = db.group(by=["yn$title"], key="rank", reducer=lambda d1,d2: d1+d2, as="total", table=tb, norm=text.norm)
tb = db.sort(key="total", table=tb)

print "Before\n"
for r in yn.rows:
  console.write( "[news] %s\n" % r["yn$title"] )
Beispiel #16
0
"""
Inner join popular delicious results and yahoo news results for the query 'iphone'
Combine results which have at least 2 terms in common in their titles
Then publish as a search results html page using the provided california template
"""

__author__ = "BOSS Team"

from templates import publisher
from util import text, console
from yos.boss.ysearch import search_v2
from yos.yql import db, udfs

dl = db.select(name="dl", udf=udfs.unnest_value, url="http://feeds.delicious.com/rss/popular/iphone")
dl.describe()
yn = db.create(name="yn", data=search_v2("iphone", bucket="news", count=50))

def overlap_predicate(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 1

serp = publisher.Serp(template_dir="templates/california", title="boss 'iphone'", endpoint="http://yahoo/search")

tb = db.join(overlap_predicate, [dl, yn])
tb = db.group(by=["yn$title"], key=None, reducer=lambda x,y: None, as=None, table=tb, norm=text.norm)

for row in tb.rows:
  serp.add(url=row["dl$link"], title=row["yn$title"], abstract=row["yn$abstract"], dispurl=row["yn$sourceurl"], source=row["dl$creator"])

serp.dump("iphone.html")
Beispiel #17
0
 def yahoo_search(vars):
   from yos.boss import ysearch
   from yos.yql import db
   data = ysearch.search(vars['query'],count=10)
   table = db.create(data=data)
   return table.rows
Beispiel #18
0
    def get(self, topic = "", subject=""):        
        #Use the query string parameters if present.
        topic = self.request.get("topic", topic)
        
        formatter = Url.UrlFormat()
        topic = formatter.removeXSS(topic)
        
        if topic == "" or topic is None:
            topic = "Paul Kinlan"
            
        topic = urllib.unquote_plus(topic)
               
        subject = self.request.get("subject", subject)
        
        if subject == "" or subject is None:
            subject = topic

        subject = urllib.unquote_plus(subject)

        topicSearch = Model.TopicSearch.gql("WHERE topic = :1", topic.lower()).get()
        
        if topicSearch is None:
            topicSearch = Model.TopicSearch(topic = topic.lower())
            
        topicSearch.topicCount = topicSearch.topicCount + 1
        topicSearch.put()
        
        subjectSearch = Model.SubjectSearch.gql("WHERE subject = :1", subject.lower()).get()
        if subjectSearch is None:
            subjectSearch = Model.SubjectSearch(subject = subject.lower())
            
        if subjectSearch.sitesData is None:
            tn = db.create(name="tn", data=ysearch.search(subject, vertical="web", count=10, more={"filter": "-p**n", "type": "html"}))
            subjectSearch.sitesData = tn.dumps()
        else:
            tn = db.create(name = "tn", data = db.simplejson.loads(subjectSearch.sitesData))
        
        subjectSearch.subjectCount = subjectSearch.subjectCount + 1
        
        subjectSearch.put()
        
        subjectTopicSearch = Model.TopicSubjectSearch.gql("WHERE topic = :1 AND subject = :2", topicSearch, subjectSearch).get()
        if subjectTopicSearch is None:
            subjectTopicSearch = Model.TopicSubjectSearch(topic = topicSearch, subject = subjectSearch)
            
        subjectTopicSearch.count = subjectTopicSearch.count + 1
        subjectTopicSearch.put()        
                
        results = { 0 : [], 1: []}
        urls = {}
        column = 0
        for row in tn.rows:            
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                domain = match.group(1)
                title =  un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, domain, url)
                urls[domain] = result
                
        for result in urls:
            results[column % 2].append(urls[result])
            column = column + 1
        
        path = os.path.join(os.path.dirname(__file__), 'results.tmpl')
        
        self.response.out.write(template.render(path, {'decoded_query': topic, "decoded_subject": subject, 'type': topicSearch.type, 'query': topic, 'subject': subject , 'urls1': results[0], 'urls2': results[1]}))
Beispiel #19
0
    def get(self, topic="", subject=""):
        #Use the query string parameters if present.
        topic = self.request.get("topic", topic)

        formatter = Url.UrlFormat()
        topic = formatter.removeXSS(topic)

        if topic == "" or topic is None:
            topic = "Paul Kinlan"

        topic = urllib.unquote_plus(topic)

        subject = self.request.get("subject", subject)

        if subject == "" or subject is None:
            subject = topic

        subject = urllib.unquote_plus(subject)

        topicSearch = Model.TopicSearch.gql("WHERE topic = :1",
                                            topic.lower()).get()

        if topicSearch is None:
            topicSearch = Model.TopicSearch(topic=topic.lower())

        topicSearch.topicCount = topicSearch.topicCount + 1
        topicSearch.put()

        subjectSearch = Model.SubjectSearch.gql("WHERE subject = :1",
                                                subject.lower()).get()
        if subjectSearch is None:
            subjectSearch = Model.SubjectSearch(subject=subject.lower())

        if subjectSearch.sitesData is None:
            tn = db.create(name="tn",
                           data=ysearch.search(subject,
                                               vertical="web",
                                               count=10,
                                               more={
                                                   "filter": "-p**n",
                                                   "type": "html"
                                               }))
            subjectSearch.sitesData = tn.dumps()
        else:
            tn = db.create(name="tn",
                           data=db.simplejson.loads(subjectSearch.sitesData))

        subjectSearch.subjectCount = subjectSearch.subjectCount + 1

        subjectSearch.put()

        subjectTopicSearch = Model.TopicSubjectSearch.gql(
            "WHERE topic = :1 AND subject = :2", topicSearch,
            subjectSearch).get()
        if subjectTopicSearch is None:
            subjectTopicSearch = Model.TopicSubjectSearch(
                topic=topicSearch, subject=subjectSearch)

        subjectTopicSearch.count = subjectTopicSearch.count + 1
        subjectTopicSearch.put()

        results = {0: [], 1: []}
        urls = {}
        column = 0
        for row in tn.rows:
            url = row["tn$url"]
            match = re.match("http://([^/]*)", url)
            if match is not None:
                domain = match.group(1)
                title = un_unicode_string(row["tn$title"])
                abstract = un_unicode_string(row["tn$abstract"])
                result = Result(title, abstract, domain, url)
                urls[domain] = result

        for result in urls:
            results[column % 2].append(urls[result])
            column = column + 1

        path = os.path.join(os.path.dirname(__file__), 'results.tmpl')

        self.response.out.write(
            template.render(
                path, {
                    'decoded_query': topic,
                    "decoded_subject": subject,
                    'type': topicSearch.type,
                    'query': topic,
                    'subject': subject,
                    'urls1': results[0],
                    'urls2': results[1]
                }))
Beispiel #20
0
"""
Inner join popular delicious results and yahoo news results for the query 'iphone'
Combine results which have at least 2 terms in common in their titles
Then publish as a search results html page using the provided california template
"""

__author__ = "BOSS Team"

from templates import publisher
from util import text, console
from yos.boss.ysearch import search_v1
from yos.yql import db, udfs

dl = db.select(name="dl", udf=udfs.unnest_value, url="http://feeds.delicious.com/rss/popular/iphone")
dl.describe()
yn = db.create(name="yn", data=search_v1("iphone", vertical="news", count=50))

def overlap_predicate(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 1

serp = publisher.Serp(template_dir="templates/california", title="boss 'iphone'", endpoint="http://yahoo/search")

tb = db.join(overlap_predicate, [dl, yn])
tb = db.group(by=["yn$title"], key=None, reducer=lambda x,y: None, as=None, table=tb, norm=text.norm)

for row in tb.rows:
  serp.add(url=row["dl$link"], title=row["yn$title"], abstract=row["yn$abstract"], dispurl=row["yn$sourceurl"], source=row["dl$creator"])

serp.dump("iphone.html")