def load_data(self): "get all tags from a CKAN website and count the occurences" tag_list = False if config.DEBUG: print "start collect tags" #get tags try: tag_list_response = lib.urlopen_with_retry(self.url + '/api/3/action/tag_list?all_fields=True') except: 1 == 1 if tag_list_response: try: tag_list_dict = json.loads(tag_list_response.read()) tag_list = tag_list_dict['result'] except: 1 == 1 for tag in tag_list: if config.DEBUG: print tag self.add_tag(tag) #get datasets try: dataset_list_response = lib.urlopen_with_retry(self.url + '/api/3/action/package_list') except: 1 == 1 if config.DEBUG: print "start collect datasets" if dataset_list_response: try: dataset_list_dict = json.loads(dataset_list_response.read()) dataset_list = dataset_list_dict['result'] except: 1 == 1 for dataset in dataset_list: dataset_response = 0 try: dataset_response = lib.urlopen_with_retry(self.url + '/api/3/action/package_search?fq=name:"' + urllib2.quote(dataset.encode('UTF-8')) + '"') except: 1 == 1 if dataset_response: try: dataset_dict = json.loads(dataset_response.read()) dataset_allfields = dataset_dict['result']['results'][0] self.add_dataset(dataset_allfields) for tag in dataset_allfields['tags']: self.add_tagging(tag, dataset_allfields) except: 1 == 1 if config.DEBUG: print "final tasks" #set tag count self.set_tag_count() self.set_language() self.load_groups() for tag in self.tags: tag.set_cooccurences(self)
def load_groups(self): "get all groups from a CKAN website and count the datasets in it" group_list_response = False; try: group_list_response = lib.urlopen_with_retry(self.url + '/api/3/action/group_list?all_fields=True') except: #1 == 1 print "Failed: " + self.url if group_list_response: try: group_list_dict = json.loads(group_list_response.read()) group_list = group_list_dict['result'] except: #1 == 1 print "Failed 2: " + self.url for group in group_list: #difference in the apis try: package_count = group['packages']; except: try: package_count = group['package_count']; except: package_count = 0 g = Group(group['name'],package_count) self.groups.append(g)
def LoadODPs(): "Reads the instance files, and initialize a list of ODP objects" ODP = [] with open(config.instances_file, 'r') as f: instances = json.loads(f.read()) print 'Number of instances: ' + str(len(instances)) for i in instances: if 'url-api' in i: url = i['url-api'] else: url = i['url'] try: response = lib.urlopen_with_retry(url + '/api/3/action/tag_list') response_pkg = lib.urlopen_with_retry(url + '/api/3/action/package_list') except: #print "Could not connect" response = 0 if response: try: response_dict = json.loads(response.read()) result = response_dict['result'] response_dict_pkg = json.loads(response_pkg.read()) packages = response_dict_pkg['result'] ODP.append(model.OpenDataPortal(url, i['title'], len(result), len(packages))) #print i['title'] + ';' + i['url'] + ';' + str(len(result)) + ';' + str(len(packages)) except: print i['title'] + ';' + url + ';' + 'No API 1' else: print i['title'] + ';' + url + ';' + 'No API 2' with open(config.objects_file, 'wb') as output: pickle.dump(ODP, output, -1)
def set_meaning(self): try: self.meanings = [] req = urllib2.Request('http://spotlight.dbpedia.org/rest/annotate?text=' + urllib.quote(self.name.encode('utf-8')), headers = {'Accept' : 'application/json'}) contents = json.loads(lib.urlopen_with_retry(req).read()) if len(contents) == 7: # if isinstance(contents['annotation']['surfaceForm'], list): for m in contents['Resources']: self.meanings.append(m['@URI']) #else: # print "here" # self.meanings.append('http://dbpedia.org/page/' + contents['annotation']['surfaceForm']['resource']['@uri'].encode('utf-8')) except: 1 == 1
def set_language(self): import pycountry try: response = lib.urlopen_with_retry(self.url + '/api/3/action/status_show') except: response = 0 if response: response_dict = json.loads(response.read()) code_1 = response_dict['result']['locale_default'] if code_1: lang = str(code_1[0]) + str(code_1[1]) code_3 = pycountry.languages.get(iso639_1_code=lang).iso639_3_code else: code_3 = 'eng' self.lang = code_3 #print code_1 + "; " + code_3 return code_3