def whensearch(q): wt = db.select(udf=date_udf, data=ysearch.search(q, count=50)) dates = db.group(by=["year", "month"], key="count", reducer=lambda x,y: x+y, aas="total", table=wt) dates = db.where(lambda r: r["month"] is not None and r["year"] is not None, table=dates) dates = db.sort(key="total", table=dates) if len(dates.rows) > 0: top = dates.rows[0] return top["month"], top["year"] return "None", "None"
from yos.yql import db from yos.boss import ysearch ynews_data = ysearch.search_v1("google android", vertical="news", count=100, more={"news.ranking": "date"}) ynews = db.create(name="ynews", data=ynews_data) ynews.rename(before="headline", after="title") sm = db.create(name="sm", url="http://summize.com/search.json?q=google+android&rpp=60&lang=en") sm.rename(before="text", after="title") ytf = lambda r: {"title": r["title"]["value"], "favorites": int(r["statistics"]["favoriteCount"])} yt = db.select(name="yt", udf=ytf, url="http://gdata.youtube.com/feeds/api/videos?vq=google+android&lr=en&orderby=published") diggf = lambda r: {"title": r["title"]["value"], "diggs": int(r["diggCount"]["value"])} digg = db.select(name="dg", udf=diggf, url="http://digg.com/rss_search?search=google+android&area=dig&type=both§ion=news") def overlap_predicate(r1, r2): return text.overlap(r1["title"], r2["title"]) > 2 tb = db.join(overlap_predicate, [ynews, sm, digg, yt]) def socialf(row): row.update({"social": row["dg$diggs"] + row["yt$favorites"]}) ; return row tb = db.select(udf=socialf, table=tb) tb = db.group(by=["ynews$title"], key="social", reducer=lambda d1,d2: d1+d2, as="rank", table=tb, norm=text.norm) tb = db.sort(key="rank", table=tb) for r in tb.rows: console.write( "\n%s\n[y] %s\n[t] %s\n[sr] %d\n" % (r["sm$created_at"], r["ynews$title"], r["sm$title"], r["rank"]) )
for m in ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec"]: if s.startswith(m): return m def parse_month(s): months = filter(lambda m: m is not None, map(month_lookup, text.uniques(s))) if len(months) > 0: return text.norm(months[0]) def parse_year(s): years = filter(lambda t: len(t) == 4 and t.startswith("19") or t.startswith("200"), text.uniques(s)) if len(years) > 0: return text.norm(years[0]) def date_udf(r): return {"year": parse_year(r["abstract"]), "month": parse_month(r["abstract"]), "count": 1} # since max fetch size in v1 is 50, let's do two calls and increment start to get the first 100 results i1 = db.select(name="i1", udf=date_udf, data=ysearch.search("when was jfk assasinated", count=50)) i2 = db.select(name="i2", udf=date_udf, data=ysearch.search("when was jfk assasinated", start=50, count=50)) iraq = db.union(name="iraq", tables=[i1, i2]) dates = db.group(by=["iraq$year", "iraq$month"], key="iraq$count", reducer=lambda d1,d2: d1+d2, as="total", table=iraq) dates = db.sort(key="total", table=dates) for row in dates.rows: month = row["iraq$month"] year = row["iraq$year"] if month is not None and year is not None: console.write( "Month: %s\tYear: %s\tTotal: %d\n" % (month, year, row["total"]) )
""" Inner join popular delicious results and yahoo news results for the query 'iphone' Combine results which have at least 2 terms in common in their titles Then publish as a search results html page using the provided california template """ __author__ = "BOSS Team" from templates import publisher from util import text, console from yos.boss.ysearch import search_v2 from yos.yql import db, udfs dl = db.select(name="dl", udf=udfs.unnest_value, url="http://feeds.delicious.com/rss/popular/iphone") dl.describe() yn = db.create(name="yn", data=search_v2("iphone", bucket="news", count=50)) def overlap_predicate(r1, r2): return text.overlap(r1["title"], r2["title"]) > 1 serp = publisher.Serp(template_dir="templates/california", title="boss 'iphone'", endpoint="http://yahoo/search") tb = db.join(overlap_predicate, [dl, yn]) tb = db.group(by=["yn$title"], key=None, reducer=lambda x,y: None, as=None, table=tb, norm=text.norm) for row in tb.rows: serp.add(url=row["dl$link"], title=row["yn$title"], abstract=row["yn$abstract"], dispurl=row["yn$sourceurl"], source=row["dl$creator"]) serp.dump("iphone.html")