def test_trans_blurb(self): kurt = "/en/kurt_vonnegut" blurb = freebase.blurb(kurt) self.assert_(blurb.startswith("Kurt Vonnegut")) self.assertNotEqual(len(blurb), 0) blurb14 = freebase.blurb(kurt, maxlength=14) blurb57 = freebase.blurb(kurt, maxlength=57) self.assertNotEqual(len(blurb14), len(blurb57)) blurbpar = freebase.blurb(kurt, break_paragraphs=True, maxlength=20000) blurbnopar = freebase.blurb(kurt, break_paragraphs=False, maxlength=20000)
def freebase_blurb(vars): import freebase try: vars["blurb"] = freebase.blurb(vars["mid"]).decode("<utf-8>") except freebase.api.MetawebError: vars["blurb"] = u"no blurb"
#!/usr/bin/python import freebase import pickle query = [{ "/music/instrument/family": [{"name": None}], "id": None, "name": None, "a:type": "/music/instrument", "b:type": "/common/topic", "/common/topic/article": {"id": None}, "/common/topic/image": [{"id": None}], "/music/instrument/instrumentalists": {"return": "count", "optional": True}}] data={} counter = 0 for row in freebase.mqlreaditer(query): datum = {} datum['name'] = row['name'] datum['url'] = 'http://freebase.com/view%s' % (row['id']) datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % (row['/common/topic/image'][0]['id']) datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % (row['/common/topic/image'][0]['id'].split('/')[2]) datum['text'] = freebase.blurb(row['/common/topic/article']['id'], maxlength=500) datum['variables'] = {} datum['variables'][0] = row['/music/instrument/instrumentalists'] families = [] for f in row['/music/instrument/family']: families.append(f['name']) datum['families'] = families data[datum['name']] = datum print('done %i' % counter) counter+=1 family = {} for datum in data.values(): for f in datum['families']: if family.has_key(f):
"limit":lim }] results_location = freebase.mqlread(query_location) ###### removing previous contents to eliminate repetition db.film_data.remove() db.book_data.remove() db.people_data.remove() db.location_data.remove() for r in results_film: if r["/common/topic/article"]: article_id = r["/common/topic/article"]["id"] article = freebase.blurb(article_id,5000) #article = freebase.raw(article_id) else: article = "" j = {"name":r["name"],"article":article} #print j db.film_data.save(j) print "\n\nfilm database contents:" for cursor1 in db.film_data.find(): print cursor1.get("name","not existing") print cursor1.get("article","not existing") for r in results_book: if r["/common/topic/article"]:
"return": "count", "optional": True } }] data = {} counter = 0 for row in freebase.mqlreaditer(query): datum = {} datum['name'] = row['name'] datum['url'] = 'http://freebase.com/view%s' % (row['id']) datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % ( row['/common/topic/image'][0]['id']) datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % ( row['/common/topic/image'][0]['id'].split('/')[2]) datum['text'] = freebase.blurb(row['/common/topic/article']['id'], maxlength=500) datum['variables'] = {} datum['variables'][0] = row['/music/instrument/instrumentalists'] families = [] for f in row['/music/instrument/family']: families.append(f['name']) datum['families'] = families data[datum['name']] = datum print('done %i' % counter) counter += 1 family = {} for datum in data.values(): for f in datum['families']: if family.has_key(f):
def get_together_DF(self,ngram,word2): print "in doc_freq" word1 = ' '.join(ngram) word1 = word1.lower() no_of_occ = 0 query = { "name~=":"*"+ word1 + "*", "id":[], "limit":7 } try: result = freebase.mqlreaditer(query) top_ten = [] count = 0 # print "\nIDs Retrieved" for i in result: if count > 10: break for str1 in i["id"]: #print str top_ten.append(str1) count += 1 #print "\nPrinting top ten" #for i in top_ten: # print i for id_name in top_ten: query = { "id":id_name, "/common/topic/article" : {"id" : None, "optional" : True, "limit" : 1} } try: r = freebase.mqlread(query) #print "currnt ID:" #print id_name article_id = r["/common/topic/article"]["id"] text = freebase.blurb(article_id, maxlength=20000).lower() #print text #print "\nFind word now:" pattern = re.compile(word2.lower()) no_of_occ += len(pattern.findall(text)) #print "\nNO.OF OCC%d"%(no_of_occ) except Exception: pass #print "\nNone type error" except Exception: pass print no_of_occ return no_of_occ
}, "name": [], "limit": lim }] results_location = freebase.mqlread(query_location) ###### removing previous contents to eliminate repetition db.film_data.remove() db.book_data.remove() db.people_data.remove() db.location_data.remove() for r in results_film: if r["/common/topic/article"]: article_id = r["/common/topic/article"]["id"] article = freebase.blurb(article_id, 5000) #article = freebase.raw(article_id) else: article = "" j = {"name": r["name"], "article": article} #print j db.film_data.save(j) print "\n\nfilm database contents:" for cursor1 in db.film_data.find(): print cursor1.get("name", "not existing") print cursor1.get("article", "not existing") for r in results_book: if r["/common/topic/article"]:
def freebase_blurb(vars) : import freebase try : vars['blurb'] = freebase.blurb(vars['mid']).decode('<utf-8>') except freebase.api.MetawebError : vars['blurb'] = u'no blurb'