def test_mqlreaditer(self): filmq = [{"id": None, "initial_release_date>=": "2009", "name": None, "type": "/film/film"}] r0 = freebase.mqlreaditer(filmq) r1 = freebase.mqlreaditer( filmq[0] ) # The difference between [{}] and []. mqlreaditer should be able to handle both self.assertNotEqual(r0, None) self.assertEqual([a for a in r0], [b for b in r1]) # and let's test it for mqlread, just in case # actually, for mqlread, it must be [{}], because there are lots of elements m0 = freebase.mqlread(filmq) m1 = lambda: freebase.mqlread(filmq[0]) self.assertRaises(MetawebError, m1) self.assertNotEqual(m0, None)
def queryCityTown(taxonomy, tourist_locations): logger.debug('queryCityTown') logger.debug(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) query = [{ "type": "/location/citytown", "name": None, "id":None, "limit": 10, "/common/topic/alias": [], "/common/topic/image": [{ "id": None, "optional": True, "/common/image/size": { "/measurement_unit/rect_size/x" : None, "/measurement_unit/rect_size/y" : None } }] }] if fwiki.settings.DEBUG == True: print 'in test mode' r = freebase.mqlread(query) else: print 'in production mode' count = 0 while count < 3 : try: r = freebase.mqlreaditer(query) break except: count += 1 for i in r: buildTBranch(i,taxonomy,tourist_locations)
def seedGames(): """Queries Freebase for all games. Stores IDs in models.GameSeed. """ logging.info("################## main.py:: seedGames() ################") query = { "type": "/games/game", "mid": None, "name": None, "key": {"namespace": BGG_NAMESPACE, "value": None, "optional": True}, } results = freebase.mqlreaditer(query, extended=True) count = 0 for r in results: logging.info("################ result:: " + str(r) + " #################") mid = r.mid if r.key is None: bgg_id = None else: bgg_id = r.key.value name = r.name game_seed = models.GameSeed.get_by_key_name(mid) if game_seed is None: game_seed = models.GameSeed(key_name=mid, mid=mid, bgg_id=bgg_id, name=name) logging.info("############# game_seed.put() ####################") game_seed.put() count += 1 logging.info("############### Count:: " + str(count) + " ###############") logging.info("################ Total count:: " + str(count) + " ############") return True
def test_mqlreaditer(self): filmq = [{'id': None, 'initial_release_date>=': '2009', 'name': None, 'type': '/film/film' }] r0 = freebase.mqlreaditer(filmq) r1 = freebase.mqlreaditer(filmq[0]) # The difference between [{}] and []. mqlreaditer should be able to handle both self.assertNotEqual(r0, None) self.assertEqual([a for a in r0], [b for b in r1]) # and let's test it for mqlread, just in case # actually, for mqlread, it must be [{}], because there are lots of elements m0 = freebase.mqlread(filmq) m1 = lambda : freebase.mqlread(filmq[0]) self.assertRaises(MetawebError, m1) self.assertNotEqual(m0, None)
def queryDivisions(taxonomy, tourist_locations): logger.debug('queryDivisions') logger.debug(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) query =[{ "type": "/location/administrative_division", "name": None, "/common/topic/image" : [ { "id": None, "optional" : True, "/common/image/size": { "/measurement_unit/rect_size/x" : None, "/measurement_unit/rect_size/y" : None } } ], "id": None, "/common/topic/alias": [], "/location/location/containedby": [{ "type": "/location/country", "name": None, "id": None, "/common/topic/alias": [], "/location/location/containedby": [{ "type": "/location/continent", "optional": True, "name": None, "id": None, "/common/topic/alias": [] }] }] }] count = 0 if fwiki.settings.DEBUG == True: print 'in test mode' iter = freebase.mqlread(query) else: print 'in production mode' while count < 3 : try: iter = freebase.mqlreaditer(query) break except: count += 1 for i in iter: contained_by = [] for c in i[C_BY]: for cc in c[C_BY]: buildTBranch(cc, taxonomy, tourist_locations) contained_by.append(cc[NAME].lower()) buildTBranch(c, taxonomy, tourist_locations, contained_by ) contained_by.append(c[NAME].lower()) buildTBranch(i,taxonomy,tourist_locations, contained_by)
def parse_fb(self): createurs = Artiste.objects.all() for createur in createurs: try: print "Scanne l'artiste "+str(createur.name_id).encode('<utf-8>') requete = { "type": "/people/person", "name": createur.name.encode('<utf8>'), "date_of_birth" : None, "place_of_birth" : None, '/common/topic/description' : None, '/common/topic/official_website' : [{}], '/common/topic/topic_equivalent_webpage' : [{}], "profession" : [] } essai = freebase.mqlreaditer(requete, asof=None) reponses = 0 if essai is not None: for i in essai: reponses = reponses+1 print "Freebase trouve "+str(reponses).encode('<utf-8>')+" réponses pour ce créateur" if reponses==1: print "debut de collecte des informations" i = freebase.mqlread(requete, asof=None) if i["date_of_birth"] is not None and createur.date_naissance is None: birth = i['date_of_birth'] birth = birth.split('-') if len(birth)==1: createur.date_naissance = datetime.datetime(int(birth[0]), 1, 1, 0, 0) else: createur.date_naissance = datetime.datetime(int(birth[0]), int(birth[1]), int(birth[2]), 0, 0) if i["place_of_birth"] is not None and createur.lieu_naissance is None: createur.lieu_naissance = i['place_of_birth'] #chopper le site perso if len(i['/common/topic/official_website'])>0: url1 = i['/common/topic/official_website'][0]['value'] test = LienWeb.objects.filter(URL = url1) if len(test) == 0: siteweb = LienWeb(URL = url1) siteweb.name = "site personnel de "+str(createur.name) siteweb.save() createur.productions_web.add(siteweb) createur.save() print "fini de chercher dans freebase pour "+str(createur.name_id).encode('<utf-8>') except: print "probleme avec l'artiste "+str(createur.name_id).encode('<utf-8>')
def retrieve_freebase_desc(name, stype): if not freebase: return "" print "Retrieving the description for %s" % name fb_type = settings.FREEBASE_TYPE_MAPPINGS.get(stype, None) value, data = None, "" try: value = freebase.mqlread({"name": name, "type": fb_type or [], FREEBASE_DESC_KEY: [{"id": None}]}) except: try: values = freebase.mqlreaditer({"name": name, "type": fb_type or [], FREEBASE_DESC_KEY: [{"id": None}]}) value = values.next() except Exception, e: # Only print error as freebase is only optional if settings.ST_DEBUG: print "Error using `freebase`: %s" % e
def retrieve_freebase_name(name, stype): if not freebase: return name search_key = fix_name_for_freebase(name) fb_type = settings.FREEBASE_TYPE_MAPPINGS.get(stype, None) value = None try: # Try to get the exact match value = freebase.mqlread({"name": None, "type": fb_type or [], "key": {"value": search_key}}) except: try: # Try to get a results has a generator and return its top result values = freebase.mqlreaditer({"name": None, "type": fb_type or [], "key": {"value": search_key}}) value = values.next() except Exception, e: # Only print error as freebase is only optional if settings.ST_DEBUG: print "Error using `freebase`: %s" % e
import freebase import json import sys from pprint import pprint as pp f = open(sys.argv[1], "r") q = json.load(f) f.close() it = freebase.mqlreaditer(q) print "[" for i in range(int(sys.argv[2])): out = it.next() print json.dumps(out, indent=2) if i != int(sys.argv[2]) - 1: print "," print "]"
#!/usr/bin/python import freebase import pickle query = [{ "/music/instrument/family": [{"name": None}], "id": None, "name": None, "a:type": "/music/instrument", "b:type": "/common/topic", "/common/topic/article": {"id": None}, "/common/topic/image": [{"id": None}], "/music/instrument/instrumentalists": {"return": "count", "optional": True}}] data={} counter = 0 for row in freebase.mqlreaditer(query): datum = {} datum['name'] = row['name'] datum['url'] = 'http://freebase.com/view%s' % (row['id']) datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % (row['/common/topic/image'][0]['id']) datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % (row['/common/topic/image'][0]['id'].split('/')[2]) datum['text'] = freebase.blurb(row['/common/topic/article']['id'], maxlength=500) datum['variables'] = {} datum['variables'][0] = row['/music/instrument/instrumentalists'] families = [] for f in row['/music/instrument/family']: families.append(f['name']) datum['families'] = families data[datum['name']] = datum print('done %i' % counter) counter+=1 family = {} for datum in data.values(): for f in datum['families']: if family.has_key(f):
"b:type": "/common/topic", "/common/topic/article": { "id": None }, "/common/topic/image": [{ "id": None }], "/music/instrument/instrumentalists": { "return": "count", "optional": True } }] data = {} counter = 0 for row in freebase.mqlreaditer(query): datum = {} datum['name'] = row['name'] datum['url'] = 'http://freebase.com/view%s' % (row['id']) datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % ( row['/common/topic/image'][0]['id']) datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % ( row['/common/topic/image'][0]['id'].split('/')[2]) datum['text'] = freebase.blurb(row['/common/topic/article']['id'], maxlength=500) datum['variables'] = {} datum['variables'][0] = row['/music/instrument/instrumentalists'] families = [] for f in row['/music/instrument/family']: families.append(f['name']) datum['families'] = families
def get_together_DF(self,ngram,word2): print "in doc_freq" word1 = ' '.join(ngram) word1 = word1.lower() no_of_occ = 0 query = { "name~=":"*"+ word1 + "*", "id":[], "limit":7 } try: result = freebase.mqlreaditer(query) top_ten = [] count = 0 # print "\nIDs Retrieved" for i in result: if count > 10: break for str1 in i["id"]: #print str top_ten.append(str1) count += 1 #print "\nPrinting top ten" #for i in top_ten: # print i for id_name in top_ten: query = { "id":id_name, "/common/topic/article" : {"id" : None, "optional" : True, "limit" : 1} } try: r = freebase.mqlread(query) #print "currnt ID:" #print id_name article_id = r["/common/topic/article"]["id"] text = freebase.blurb(article_id, maxlength=20000).lower() #print text #print "\nFind word now:" pattern = re.compile(word2.lower()) no_of_occ += len(pattern.findall(text)) #print "\nNO.OF OCC%d"%(no_of_occ) except Exception: pass #print "\nNone type error" except Exception: pass print no_of_occ return no_of_occ
return None f_defs = codecs.open('gazetteer_lists/definitions.def', 'w', encoding="utf-8") #################################################################################################### # sports #################################################################################################### sports_query = [{ "id": None, "name": None, "type": "/sports/sport" }] sports = freebase.mqlreaditer(sports_query, asof=None) f_sports = codecs.open('gazetteer_lists/sports.lst', 'w', encoding="utf-8") for sport in sports: write_perms(sport.name, f_sports) f_sports.close() f_defs.write("sports.lst:sport\n") #################################################################################################### # teams #################################################################################################### teams_query = [{
import freebase import freebase_fetch import sys, traceback import MySQLdb import httplib try: db = freebase_fetch.get_db() # Get all instances of all types in the "/music" domain r = freebase.mqlreaditer({"type": [{"type": "/type/type", "domain": "/music", "id": None}], "id": None, "creator": None, "timestamp": None}) print "Got resource_type results, now iterating through them." chunksize = 100 maxchunks = -1 chunks = 0 count = 0 for i in r: if maxchunks == chunks: break freebase_fetch.insert_resource(i, db) resource_key = freebase_fetch.get_key(i["id"], db) type_key = freebase_fetch.get_key(i["type"][0]["id"], db) freebase_fetch.add_resource_type(resource_key, type_key, db) # Commit on each iteration, to avoid huge bulk commits db.commit() count = count + 1 if chunksize == count: count = 0; chunks = chunks + 1 print str(chunks) + ") Committed " + str(chunksize) + " resource_type records."
#!/usr/bin/python import freebase, pdb from urlparse import urlparse from pprint import pprint query = [{ "type": "/common/document", "/common/document/source_uri": { "value": None, "optional": True }, "limit" : 1000 }] result = freebase.mqlreaditer(query) data = { } c = 0 for blurb in result: c += 1 # Figure out the import domain. import_domain = "unknown" if blurb.get("/common/document/source_uri") and blurb["/common/document/source_uri"]["value"].startswith("http://wp/"): import_domain = "wikipedia" if not data.get(import_domain): data[import_domain] = 0
image_query = [{ "type": "/common/image", "/common/licensed_object/license": [{ "id": None, "optional": True }], "!/type/content_import/content": { "/type/content_import/uri": None, "optional": True, "limit" : 1 }, }] result = freebase.mqlreaditer(image_query) image_data = { } c = 0 licenses = set() for image in result: c += 1 if not c % 10000: v = "%s (%s domains)" % (c, str(len(image_data.keys()))) print >> sys.stderr, v # Figure out the import domain. import_domain = "unknown" if image.get("!/type/content_import/content") and image["!/type/content_import/content"]["/type/content_import/uri"]: