def run(num, file_name): rg = RG(file_name) profile_paths = getPublicProfiles(limit=num) try: for path in profile_paths: parser = ProfileParser(path) profile = parser.parseHtml() # print profile.extra_profile_list # Utils.putExtraProfilesIntoDB(profile.extra_profile_list) rg.add(profile) DBHelper.dataSetRDF(profile.file_name, rdf=1) except Exception: traceback.print_exc() rg.save(format='xml', file_name=file_name) rg.close() DBHelper.commitAndClose() else: rg.save(format='xml', file_name=file_name) rg.close() DBHelper.commitAndClose()
def add_experience_triple(self, profile, person): for experience in profile.experience_list: if profile.city is None: if 'city' in experience: self.set_profile_city(person, profile, experience['city']) if 'job_title' in experience: job_title = experience['job_title'] job_title = self.position_helper(job_title) term = BNode() self.graph_add(term, RDF.type, self.schema.Position) self.graph_add(term, self.schema.occupation, Literal(job_title)) try: if experience['from'] and self.check_datetime_format(experience['from']): self.graph_add(term, self.schema.from_value, Literal(experience['from'], datatype=XSD.date)) except KeyError: pass try: if experience['to']: if self.check_datetime_format(experience['to']): self.graph_add(term, self.schema.to_time, Literal(experience['to'], datatype=XSD.date)) elif experience['to'].lower() == 'current' or experience['to'].upper() == 'now': self.graph_add(term, self.schema.to_time, Literal('now', datatype=XSD.string)) except KeyError: pass if 'company_name' in experience: company_name = experience['company_name'] company_name = self.company_name_helper(company_name) company = self.schema.get_term(company_name) self.graph_add(company, RDFS.label, Literal(company_name, datatype=XSD.string)) # we need to define this company if company_name not in self.companies: self.graph_add(company, RDF.type, self.schema.Organization) self.companies.add(company_name) # add city info cities = self.get_cities_by_company_name(company_name) for city in cities: self.graph_add(company, self.schema.city, self.schema.get_term(city)) if profile.city is None: if cities: self.set_profile_city(person, profile, cities[0]) # extra process required for if 'company_url' in experience: company_profile = self.get_company_profile(experience['company_url'], company_name) if 'Founded' in company_profile: self.graph_add(company, self.schema.formation_year, Literal(company_profile['Founded'], datatype=XSD.gYear)) if 'Company Size' in company_profile: mini, maxi = self.get_company_size(company_profile['Company Size']) self.graph_add(company, self.schema.from_value, Literal(mini, datatype=XSD.integer)) self.graph_add(company, self.schema.to_time, Literal(maxi, datatype=XSD.integer)) if 'Type' in company_profile: self.graph_add(company, self.schema.organization_type, Literal(company_profile['Type'], datatype=XSD.string)) if 'Industry' in company_profile: self.graph_add(company, self.schema.industry, self.schema.get_term(company_profile['Industry'])) DBHelper.dataSetRDF(company_profile['file_name'], rdf=1) self.graph_add(company, self.schema.has_position, term) self.graph_add(person, self.schema.works_as, term)