Exemple #1
0
def wwwsearch(q):
  qm = QueryMiner(q)

  def phrases_udf(r):
    r.update({"phrases": qm.extract(r)}) ; return r

  pc = defaultdict(lambda: 1)
  
  def pc_udf(r):
    for p in r["phrases"]: pc[p] += 1
    
  w = db.select(udf=phrases_udf, data=ysearch.search(q, count=50))
  db.select(udf=pc_udf, table=w)

  if len(pc) > 0:
    return sorted(pc.iteritems(), key=itemgetter(1), reverse=True)[0][0]

  return "None"
Exemple #2
0
def whensearch(q):
  wt = db.select(udf=date_udf, data=ysearch.search(q, count=50))
  dates = db.group(by=["year", "month"], key="count", reducer=lambda x,y: x+y, aas="total", table=wt)
  dates = db.where(lambda r: r["month"] is not None and r["year"] is not None, table=dates)
  dates = db.sort(key="total", table=dates)

  if len(dates.rows) > 0:
    top = dates.rows[0]
    return top["month"], top["year"]

  return "None", "None"
Exemple #3
0
  def get(self):
    query = console.strfix(self.request.get("query"))
    count = int(console.strfix(self.request.get("count")))
    offset = int(console.strfix(self.request.get("offset")))
    includeDelicious = console.strfix(self.request.get("includeDelicious"))      

    search_results = ysearch.search(query,count=count,start=offset);
    web = db.create(data=search_results)
    if len(includeDelicious) == 4:
      dl = db.select(udfs.unnest_value, name="dl", url=u"http://feeds.delicious.com/rss/popular/%s" % query)
      web = db.join(overlap_predicate,[web,dl])

    serialized = simplejson.dumps(web.rows)
    self.response.out.write(serialized)
Exemple #4
0
__author__ = "BOSS Team"

from util import console, text
from yos.yql import db
from yos.boss import ysearch

ynews_data = ysearch.search_v1("google android", vertical="news", count=100, more={"news.ranking": "date"})
ynews = db.create(name="ynews", data=ynews_data)
ynews.rename(before="headline", after="title")

sm = db.create(name="sm", url="http://summize.com/search.json?q=google+android&rpp=60&lang=en")
sm.rename(before="text", after="title")

ytf = lambda r: {"title": r["title"]["value"], "favorites": int(r["statistics"]["favoriteCount"])}
yt = db.select(name="yt", udf=ytf, url="http://gdata.youtube.com/feeds/api/videos?vq=google+android&lr=en&orderby=published")

diggf = lambda r: {"title": r["title"]["value"], "diggs": int(r["diggCount"]["value"])}
digg = db.select(name="dg", udf=diggf, url="http://digg.com/rss_search?search=google+android&area=dig&type=both&section=news")

def overlap_predicate(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 2

tb = db.join(overlap_predicate, [ynews, sm, digg, yt])

def socialf(row):
  row.update({"social": row["dg$diggs"] + row["yt$favorites"]}) ; return row

tb = db.select(udf=socialf, table=tb)
tb = db.group(by=["ynews$title"], key="social", reducer=lambda d1,d2: d1+d2, as="rank", table=tb, norm=text.norm)
tb = db.sort(key="rank", table=tb)
Exemple #5
0

"""
Search yahoo news and twitter for facebook
Combine results with techmeme feeds based on titles having at least 2 term overlap
Print results to stdout
"""

__author__ = "BOSS Team"

from util import console, text
from yos.yql import db, udfs
from yos.boss import ysearch

gn = db.create(name="gn", data=ysearch.search_v1("facebook", vertical="news", count=40))
gn.rename("headline", "title")

sm = db.create(name="sm", url="http://search.twitter.com/search.json?q=facebook&rpp=40")
sm.rename("text", "title")

tm = db.select(name="tm", udf=udfs.unnest_value, url="http://techmeme.com/firehose.xml")

def overlap(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 1

j = db.join(overlap, [gn, sm, tm])
j = db.sort(key="sm$id", table=j)

for r in j.rows:
  console.write( "\n%s\n[yahoo] %s\n[twitter] %s\n[techmeme] %s\n" % (r["sm$created_at"], r["gn$title"], r["sm$title"], r["tm$title"]) )
Exemple #6
0
In the group by sum by diggs, save as field rank
Then sort by rank and print to stdout
"""

__author__ = "BOSS Team"

from util import console, text
from yos.yql import db
from yos.boss import ysearch

ynews_data = ysearch.search_v1("google android", vertical="news", count=60)
ynews = db.create(name="ynews", data=ynews_data)
ynews.rename(before="headline", after="title")

sm = db.create(name="sm", url="http://summize.com/search.json?q=google+android&rpp=60&lang=en")
sm.rename(before="text", after="title")

titlef = lambda r: {"title": r["title"]["value"], "diggs": int(r["diggCount"]["value"])}
digg = db.select(name="dg", udf=titlef, url="http://digg.com/rss_search?search=google+android&area=dig&type=both&section=news")

def overlap_predicate(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 2

tb = db.join(overlap_predicate, [ynews, sm, digg])
tb = db.group(by=["ynews$title"], key="dg$diggs", reducer=lambda d1, d2: d1 + d2, as="rank", table=tb, norm=text.norm)
tb = db.sort(key="rank", table=tb)

for r in tb.rows:
  console.write( "\n%s\n[y] %s\n[t] %s\n[s] %d\n" % (r["sm$created_at"], r["ynews$title"], r["sm$title"], r["rank"]) )

Exemple #7
0
Sort by the overlap sizes
This could potentially be a new freshness model, based on the idea that wikipedia is updated for recent significance
"""

__author__ = "BOSS Team"

from util import console, text
from yos.boss import ysearch
from yos.yql import db

yn = db.create(name="yn", data=ysearch.search("iphone sdk", bucket="news", count=50))
wiki = db.create(name="wiki", url="http://en.wikipedia.org/w/index.php?title=IPhone_OS&feed=atom&action=history")

tb = db.cross([yn, wiki])

def rankf(row):
  row.update( {"rank": text.overlap(row["yn$abstract"], row["wiki$summary"]["value"])} ) ; return row

tb = db.select(udf=rankf, table=tb)
tb = db.group(by=["yn$title"], key="rank", reducer=lambda d1,d2: d1+d2, as="total", table=tb, norm=text.norm)
tb = db.sort(key="total", table=tb)

print "Before\n"
for r in yn.rows:
  console.write( "[news] %s\n" % r["yn$title"] )

print "After\n"
for r in tb.rows:
  console.write( "[news] %s\n[source] %s\t[rank] %d\n" % (r["yn$title"], r["yn$source"], r["total"]) )

Exemple #8
0
  for m in ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec"]:
    if s.startswith(m):
      return m

def parse_month(s):
  months = filter(lambda m: m is not None, map(month_lookup, text.uniques(s)))
  if len(months) > 0:
    return text.norm(months[0])

def parse_year(s):
  years = filter(lambda t: len(t) == 4 and t.startswith("19") or t.startswith("200"), text.uniques(s))
  if len(years) > 0:
    return text.norm(years[0])

def date_udf(r):
  return {"year": parse_year(r["abstract"]), "month": parse_month(r["abstract"]), "count": 1}

# since max fetch size in v1 is 50, let's do two calls and increment start to get the first 100 results
i1 = db.select(name="i1", udf=date_udf, data=ysearch.search("when was jfk assasinated", count=50))
i2 = db.select(name="i2", udf=date_udf, data=ysearch.search("when was jfk assasinated", start=50, count=50))

iraq = db.union(name="iraq", tables=[i1, i2])
dates = db.group(by=["iraq$year", "iraq$month"], key="iraq$count", reducer=lambda d1,d2: d1+d2, as="total", table=iraq)
dates = db.sort(key="total", table=dates)

for row in dates.rows:
  month = row["iraq$month"]
  year = row["iraq$year"]
  if month is not None and year is not None:
    console.write( "Month: %s\tYear: %s\tTotal: %d\n" % (month, year, row["total"]) )
Exemple #9
0
# See accompanying LICENSE file or http://www.opensource.org/licenses/BSD-3-Clause for the specific language governing permissions and limitations under the License.

"""
Inner join popular delicious results and yahoo news results for the query 'iphone'
Combine results which have at least 2 terms in common in their titles
Then publish as a search results html page using the provided california template
"""

__author__ = "BOSS Team"

from templates import publisher
from util import text, console
from yos.boss.ysearch import search_v2
from yos.yql import db, udfs

dl = db.select(name="dl", udf=udfs.unnest_value, url="http://feeds.delicious.com/rss/popular/iphone")
dl.describe()
yn = db.create(name="yn", data=search_v2("iphone", bucket="news", count=50))

def overlap_predicate(r1, r2):
  return text.overlap(r1["title"], r2["title"]) > 1

serp = publisher.Serp(template_dir="templates/california", title="boss 'iphone'", endpoint="http://yahoo/search")

tb = db.join(overlap_predicate, [dl, yn])
tb = db.group(by=["yn$title"], key=None, reducer=lambda x,y: None, as=None, table=tb, norm=text.norm)

for row in tb.rows:
  serp.add(url=row["dl$link"], title=row["yn$title"], abstract=row["yn$abstract"], dispurl=row["yn$sourceurl"], source=row["dl$creator"])

serp.dump("iphone.html")