Beispiel #1
0
	def load_data(self):
		"get all tags from a CKAN website and count the occurences"
		tag_list = False

		if config.DEBUG: print "start collect tags"

		#get tags
		try:		
			tag_list_response = lib.urlopen_with_retry(self.url + '/api/3/action/tag_list?all_fields=True')
		except:
			1 == 1
		if tag_list_response: 
			try: 
				tag_list_dict = json.loads(tag_list_response.read())	
				tag_list = tag_list_dict['result']
			except:
				1 == 1
			for tag in tag_list:
				if config.DEBUG: print tag
				self.add_tag(tag)

		#get datasets
		try:		
			dataset_list_response = lib.urlopen_with_retry(self.url + '/api/3/action/package_list')
		except:
			1 == 1

		if config.DEBUG: print "start collect datasets"

		if dataset_list_response: 
			try: 
				dataset_list_dict = json.loads(dataset_list_response.read())	
				dataset_list = dataset_list_dict['result']
			except:
				1 == 1
			for dataset in dataset_list:
				dataset_response = 0
				try:		
					dataset_response = lib.urlopen_with_retry(self.url + '/api/3/action/package_search?fq=name:"' + urllib2.quote(dataset.encode('UTF-8')) + '"')
				except:
					1 == 1
				if dataset_response: 
					try: 
						dataset_dict = json.loads(dataset_response.read())	
						dataset_allfields = dataset_dict['result']['results'][0]
						self.add_dataset(dataset_allfields)						

						for tag in dataset_allfields['tags']:
							self.add_tagging(tag, dataset_allfields)
					except:
						1 == 1

		if config.DEBUG: print "final tasks"

		#set tag count
		self.set_tag_count()
		self.set_language()
		self.load_groups()
		for tag in self.tags:
			tag.set_cooccurences(self)
Beispiel #2
0
	def load_groups(self):
		"get all groups from a CKAN website and count the datasets in it"
			
		group_list_response = False;
		try:		
			group_list_response = lib.urlopen_with_retry(self.url + '/api/3/action/group_list?all_fields=True')
		except:
			#1 == 1 
			print "Failed: " + self.url

		if group_list_response: 
			try: 
				group_list_dict = json.loads(group_list_response.read())	
				group_list = group_list_dict['result']
			except:
				#1 == 1
				print "Failed 2: " + self.url
			for group in group_list:
				#difference in the apis
				try:
					package_count = group['packages'];
				except:
					try:
						package_count = group['package_count'];
					except:
						package_count = 0
				g = Group(group['name'],package_count)
				self.groups.append(g)
Beispiel #3
0
def LoadODPs():
	"Reads the instance files, and initialize a list of ODP objects"

	ODP = []

	with open(config.instances_file, 'r') as f:
		instances = json.loads(f.read())

	print 'Number of instances: ' + str(len(instances))

	for i in instances:
		if 'url-api' in i:
			url = i['url-api']
		else:
			url = i['url']

		try: 
			response = lib.urlopen_with_retry(url + '/api/3/action/tag_list')
			response_pkg = lib.urlopen_with_retry(url + '/api/3/action/package_list')
		except:
			#print "Could not connect"
			response = 0
		if response:
			try:		
				response_dict = json.loads(response.read())	
				result = response_dict['result']

				response_dict_pkg = json.loads(response_pkg.read())	
				packages = response_dict_pkg['result']

				ODP.append(model.OpenDataPortal(url, i['title'], len(result), len(packages)))
				#print i['title'] + ';' + i['url'] + ';' + str(len(result)) + ';' + str(len(packages))

			except:
				print i['title'] + ';' + url + ';' + 'No API 1'	
			
		else:
			print i['title'] + ';' + url + ';' + 'No API 2'
	

	with open(config.objects_file, 'wb') as output:
		pickle.dump(ODP, output, -1)
Beispiel #4
0
	def set_meaning(self):

		try:
			self.meanings = []
			req = urllib2.Request('http://spotlight.dbpedia.org/rest/annotate?text=' + urllib.quote(self.name.encode('utf-8')), headers = {'Accept' : 'application/json'})	
			contents = json.loads(lib.urlopen_with_retry(req).read())

			if len(contents) == 7:
	#			if isinstance(contents['annotation']['surfaceForm'], list):
				for m in contents['Resources']:
						self.meanings.append(m['@URI'])
				#else:
				#	print "here"
				#	self.meanings.append('http://dbpedia.org/page/' + contents['annotation']['surfaceForm']['resource']['@uri'].encode('utf-8'))
		except:
			1 == 1
Beispiel #5
0
	def set_language(self):
		import pycountry

		try:
			response = lib.urlopen_with_retry(self.url + '/api/3/action/status_show')
		except:
			response = 0

		if response:

			response_dict = json.loads(response.read())	
			code_1 = response_dict['result']['locale_default']
		
			if code_1:
				lang = str(code_1[0]) + str(code_1[1])
				code_3 = pycountry.languages.get(iso639_1_code=lang).iso639_3_code
			else:
				code_3 = 'eng'

			self.lang = code_3
			#print code_1 + "; " + code_3
			return code_3