def test_trans_blurb(self):
     kurt = "/en/kurt_vonnegut"
     
     blurb = freebase.blurb(kurt)
     self.assert_(blurb.startswith("Kurt Vonnegut"))
     self.assertNotEqual(len(blurb), 0)
     
     blurb14 = freebase.blurb(kurt, maxlength=14)
     blurb57 = freebase.blurb(kurt, maxlength=57)
     self.assertNotEqual(len(blurb14), len(blurb57))
     
     blurbpar = freebase.blurb(kurt, break_paragraphs=True, maxlength=20000)
     blurbnopar = freebase.blurb(kurt, break_paragraphs=False, maxlength=20000)
    def test_trans_blurb(self):
        kurt = "/en/kurt_vonnegut"

        blurb = freebase.blurb(kurt)
        self.assert_(blurb.startswith("Kurt Vonnegut"))
        self.assertNotEqual(len(blurb), 0)

        blurb14 = freebase.blurb(kurt, maxlength=14)
        blurb57 = freebase.blurb(kurt, maxlength=57)
        self.assertNotEqual(len(blurb14), len(blurb57))

        blurbpar = freebase.blurb(kurt, break_paragraphs=True, maxlength=20000)
        blurbnopar = freebase.blurb(kurt, break_paragraphs=False, maxlength=20000)
Exemple #3
0
    def freebase_blurb(vars):
        import freebase

        try:
            vars["blurb"] = freebase.blurb(vars["mid"]).decode("<utf-8>")
        except freebase.api.MetawebError:
            vars["blurb"] = u"no blurb"
#!/usr/bin/python
import freebase
import pickle

query = [{ "/music/instrument/family": [{"name": None}], "id": None, "name": None,  "a:type": "/music/instrument",  "b:type": "/common/topic",  "/common/topic/article": {"id": None},  "/common/topic/image": [{"id": None}], "/music/instrument/instrumentalists": {"return": "count", "optional": True}}]

data={}
counter = 0
for row in freebase.mqlreaditer(query):
    datum = {}
    datum['name'] = row['name']
    datum['url'] = 'http://freebase.com/view%s' % (row['id'])
    datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % (row['/common/topic/image'][0]['id'])
    datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % (row['/common/topic/image'][0]['id'].split('/')[2])
    datum['text'] = freebase.blurb(row['/common/topic/article']['id'], maxlength=500)
    datum['variables'] = {}
    datum['variables'][0] = row['/music/instrument/instrumentalists']
    families = []
    for f in row['/music/instrument/family']:
        families.append(f['name'])
    datum['families'] = families

    data[datum['name']] = datum
    print('done %i' % counter)
    counter+=1


family = {}
for datum in data.values():
    for f in datum['families']:
        if family.has_key(f):
Exemple #5
0
         "limit":lim
    }]
results_location = freebase.mqlread(query_location)


###### removing previous contents to eliminate repetition
db.film_data.remove()
db.book_data.remove()
db.people_data.remove()
db.location_data.remove()


for r in results_film:
    if r["/common/topic/article"]:
        article_id = r["/common/topic/article"]["id"]
        article = freebase.blurb(article_id,5000)
    #article = freebase.raw(article_id)
        
    else:
        article = ""
    j = {"name":r["name"],"article":article}
    #print j
    db.film_data.save(j)

print "\n\nfilm database contents:"
for cursor1 in db.film_data.find():
    print cursor1.get("name","not existing") 
    print cursor1.get("article","not existing") 

for r in results_book:
    if r["/common/topic/article"]:
        "return": "count",
        "optional": True
    }
}]

data = {}
counter = 0
for row in freebase.mqlreaditer(query):
    datum = {}
    datum['name'] = row['name']
    datum['url'] = 'http://freebase.com/view%s' % (row['id'])
    datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % (
        row['/common/topic/image'][0]['id'])
    datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % (
        row['/common/topic/image'][0]['id'].split('/')[2])
    datum['text'] = freebase.blurb(row['/common/topic/article']['id'],
                                   maxlength=500)
    datum['variables'] = {}
    datum['variables'][0] = row['/music/instrument/instrumentalists']
    families = []
    for f in row['/music/instrument/family']:
        families.append(f['name'])
    datum['families'] = families

    data[datum['name']] = datum
    print('done %i' % counter)
    counter += 1

family = {}
for datum in data.values():
    for f in datum['families']:
        if family.has_key(f):
     def get_together_DF(self,ngram,word2):
     	  print "in doc_freq"
     	  word1 = ' '.join(ngram)
     	  word1 = word1.lower()
          no_of_occ = 0
          query = {
                  "name~=":"*"+ word1 + "*",
               "id":[],
               "limit":7
                 }
		
          try:

               result = freebase.mqlreaditer(query)
               		 
               top_ten = []
               count = 0
         # print "\nIDs Retrieved"
               for i in result:
                    if count > 10:
                         break
                    for str1 in i["id"]:
                    #print str
                         top_ten.append(str1)
      
                         count += 1
   
          #print "\nPrinting top ten"
                    #for i in top_ten:
                     #    print i


                    for id_name in top_ten:

                         query = {
                              "id":id_name,
                              "/common/topic/article" : {"id" : None, "optional" : True, "limit" : 1}
                              }
                    try:

                         r = freebase.mqlread(query)
                    #print "currnt ID:"
                    #print id_name
       
                         article_id = r["/common/topic/article"]["id"]
                         text = freebase.blurb(article_id, maxlength=20000).lower()

                    #print text
                    #print "\nFind word now:"

                         pattern = re.compile(word2.lower())
                    
                         no_of_occ += len(pattern.findall(text))
                    #print "\nNO.OF OCC%d"%(no_of_occ)
                    except Exception:
                         pass
         #print "\nNone type error"
          except Exception:
               pass     
          print no_of_occ
          return no_of_occ
Exemple #8
0
    },
    "name": [],
    "limit": lim
}]
results_location = freebase.mqlread(query_location)

###### removing previous contents to eliminate repetition
db.film_data.remove()
db.book_data.remove()
db.people_data.remove()
db.location_data.remove()

for r in results_film:
    if r["/common/topic/article"]:
        article_id = r["/common/topic/article"]["id"]
        article = freebase.blurb(article_id, 5000)
    #article = freebase.raw(article_id)

    else:
        article = ""
    j = {"name": r["name"], "article": article}
    #print j
    db.film_data.save(j)

print "\n\nfilm database contents:"
for cursor1 in db.film_data.find():
    print cursor1.get("name", "not existing")
    print cursor1.get("article", "not existing")

for r in results_book:
    if r["/common/topic/article"]:
Exemple #9
0
 def freebase_blurb(vars) :
   import freebase
   try :
     vars['blurb'] = freebase.blurb(vars['mid']).decode('<utf-8>')
   except freebase.api.MetawebError :
     vars['blurb'] = u'no blurb'