コード例 #1
0
def run(num, file_name):
	rg = RG(file_name)
	profile_paths = getPublicProfiles(limit=num)
	try:
		for path in profile_paths:
			parser = ProfileParser(path)
			profile = parser.parseHtml()
			# print profile.extra_profile_list
			# Utils.putExtraProfilesIntoDB(profile.extra_profile_list)
			rg.add(profile)
			DBHelper.dataSetRDF(profile.file_name, rdf=1)
	except Exception:
		traceback.print_exc()
		rg.save(format='xml', file_name=file_name)
		rg.close()
		DBHelper.commitAndClose()
	else:
		rg.save(format='xml', file_name=file_name)
		rg.close()
		DBHelper.commitAndClose()
コード例 #2
0
	def add_experience_triple(self, profile, person):
		for experience in profile.experience_list:
			if profile.city is None:
				if 'city' in experience:
					self.set_profile_city(person, profile, experience['city'])

			if 'job_title' in experience:
				job_title = experience['job_title']
				job_title = self.position_helper(job_title)
				term = BNode()

				self.graph_add(term, RDF.type, self.schema.Position)
				self.graph_add(term, self.schema.occupation, Literal(job_title))
				try:
					if experience['from'] and self.check_datetime_format(experience['from']):
						self.graph_add(term, self.schema.from_value, Literal(experience['from'], datatype=XSD.date))
				except KeyError:
					pass
				try:
					if experience['to']:
						if self.check_datetime_format(experience['to']):
							self.graph_add(term, self.schema.to_time, Literal(experience['to'], datatype=XSD.date))
						elif experience['to'].lower() == 'current' or experience['to'].upper() == 'now':
							self.graph_add(term, self.schema.to_time, Literal('now', datatype=XSD.string))
				except KeyError:
					pass

			if 'company_name' in experience:
				company_name = experience['company_name']
				company_name = self.company_name_helper(company_name)
				company = self.schema.get_term(company_name)

				self.graph_add(company, RDFS.label, Literal(company_name, datatype=XSD.string))

				# we need to define this company
				if company_name not in self.companies:
					self.graph_add(company, RDF.type, self.schema.Organization)
					self.companies.add(company_name)

					# add city info
					cities = self.get_cities_by_company_name(company_name)
					for city in cities:
						self.graph_add(company, self.schema.city, self.schema.get_term(city))

					if profile.city is None:
						if cities:
							self.set_profile_city(person, profile, cities[0])

					# extra process required for
					if 'company_url' in experience:
						company_profile = self.get_company_profile(experience['company_url'], company_name)

						if 'Founded' in company_profile:
							self.graph_add(company, self.schema.formation_year, Literal(company_profile['Founded'], datatype=XSD.gYear))

						if 'Company Size' in company_profile:
							mini, maxi = self.get_company_size(company_profile['Company Size'])
							self.graph_add(company, self.schema.from_value, Literal(mini, datatype=XSD.integer))
							self.graph_add(company, self.schema.to_time, Literal(maxi, datatype=XSD.integer))

						if 'Type' in company_profile:
							self.graph_add(company, self.schema.organization_type, Literal(company_profile['Type'], datatype=XSD.string))

						if 'Industry' in company_profile:
							self.graph_add(company, self.schema.industry, self.schema.get_term(company_profile['Industry']))

						DBHelper.dataSetRDF(company_profile['file_name'], rdf=1)

				self.graph_add(company, self.schema.has_position, term)
				self.graph_add(person, self.schema.works_as, term)