def __init__(self, predicate_resume, in_queue=None, out_queue=None, page_size=10000, language='en', kb_SPARQL_endpoint="https://dbpedia.org/sparql", defaultGraph="http://dbpedia.org"): """ in_queue: A Queue of input objects to process; primarily for running in parallel """ Process.__init__(self) self.daemon = True self.in_queue = in_queue self.out_queue = out_queue # Each UDF starts its own Engine # See http://docs.sqlalchemy.org/en/latest/core/pooling.html#using-connection-pools-with-multiprocessing SentimanticSession = get_sentimantic_session() self.sentimantic_session = SentimanticSession() self.predicate_resume = predicate_resume self.page_size = page_size self.language = language self.kb_SPARQL_endpoint = kb_SPARQL_endpoint self.defaultGraph = defaultGraph
def save_predicate(predicate_URI): logging.info('Saving predicate "%s"', predicate_URI) SentimanticSession = get_sentimantic_session() sentimantic_session = SentimanticSession() predicate_URI = predicate_URI.strip() try: new_predicate = Predicate(uri=predicate_URI) sentimantic_session.add(new_predicate) sentimantic_session.commit() except IntegrityError: logging.warn('Predicate "%s" already existing', predicate_URI) sentimantic_session.rollback() logging.info('Predicate "%s" saved', predicate_URI)
def infer_and_save_predicate_candidates_types( predicate_URI, sample_files_base_path="./data/samples/"): logging.info( 'Starting infering predicate "%s" domain, range and candidates types ', predicate_URI) SentimanticSession = get_sentimantic_session() sentimantic_session = SentimanticSession() predicate_URI = predicate_URI.strip() #retrieve predicate domain domains = get_predicate_domains(predicate_URI) #retrieve predicate range ranges = get_predicate_ranges(predicate_URI) predicate = sentimantic_session.query(Predicate).filter( Predicate.uri == predicate_URI).first() if predicate != None: for domain in domains: subject_ne = domain["ne"] for range in ranges: object_ne = range["ne"] candidate=sentimantic_session.query(BinaryCandidate) \ .filter(BinaryCandidate.subject_namedentity == subject_ne, BinaryCandidate.object_namedentity == object_ne ).first() if candidate == None: candidate = BinaryCandidate(subject_namedentity=subject_ne, object_namedentity=object_ne) sentimantic_session.add(candidate) sentimantic_session.commit() candidate=sentimantic_session.query(BinaryCandidate) \ .filter(BinaryCandidate.subject_namedentity == subject_ne, BinaryCandidate.object_namedentity == object_ne ).first() pca=sentimantic_session.query(PredicateCandidateAssoc) \ .filter(PredicateCandidateAssoc.predicate_id == predicate.id, PredicateCandidateAssoc.candidate_id == candidate.id ).first() if pca == None: predicate_split = predicate_URI.split('/') predicate_split_len = len(predicate_split) predicate_name = predicate_split[predicate_split_len - 1].strip() pca = PredicateCandidateAssoc(predicate_id=predicate.id, candidate_id=candidate.id) sentimantic_session.add(pca) sentimantic_session.commit() logging.info( 'Finished infering predicate "%s" domain, range and candidates types ', predicate_URI)
def get_types_filter_regex(): SentimanticSession = get_sentimantic_session() sentimantic_session = SentimanticSession() types = sentimantic_session.query(Type).all() i = 0 filter = "FILTER( " for type in types: if i != 0: filter = filter + " || " filter = filter + """ regex(?type, \"""" + type.uri + """\", "i") """ i = i + 1 filter = filter + " ) " return filter
def get_predicate_resume(predicate_configs): predicate_URI = predicate_configs['uri'] result = [] SentimanticSession = get_sentimantic_session() sentimantic_session = SentimanticSession() predicate = sentimantic_session.query(Predicate).filter( Predicate.uri == predicate_URI).first() if predicate != None: predicate_URI = predicate_URI.strip() pca_list=sentimantic_session.query(PredicateCandidateAssoc) \ .filter(PredicateCandidateAssoc.predicate_id == predicate.id).all() for pca in pca_list: candidate=sentimantic_session.query(BinaryCandidate) \ .filter(BinaryCandidate.id==pca.candidate_id).first() subject_ne = candidate.subject_namedentity.strip() object_ne = candidate.object_namedentity.strip() candidate_name = (subject_ne + object_ne).encode("utf-8") CandidateSubclass = candidate_subclass(candidate_name, ["subject", "object"]) try: statement = text(""" CREATE OR REPLACE VIEW """ + candidate_name.lower() + """_view AS SELECT document.id AS docid, document.name AS docname, """ + candidate_name.lower() + """.id AS candid, candidate.split, sentence.id as sent_id, sentence.text, predicate.uri as predicate_URI, label.value AS label_value, marginal.probability FROM """ + candidate_name.lower() + """ JOIN candidate ON candidate.id = """ + candidate_name.lower() + """.id JOIN span ON """ + candidate_name.lower() + """.subject_person_id = span.id JOIN sentence ON span.sentence_id = sentence.id JOIN document ON sentence.document_id = document.id LEFT JOIN label ON candidate.id = label.candidate_id LEFT JOIN label_key ON label.key_id = label_key.id LEFT JOIN predicate_candidate_assoc ON label_key."group" = predicate_candidate_assoc.id left join predicate on predicate_candidate_assoc.predicate_id=predicate.id LEFT JOIN marginal ON marginal.candidate_id = candidate.id; """) get_sentimantic_engine().execute(statement) except Exception: print("Skip view creation") subject_type=sentimantic_session.query(TypeNamedEntityAssoc) \ .filter(TypeNamedEntityAssoc.namedentity == subject_ne).first().type object_type=sentimantic_session.query(TypeNamedEntityAssoc) \ .filter(TypeNamedEntityAssoc.namedentity == object_ne).first().type predicate_name = predicate_configs['name'] words = predicate_configs["words"] sample_class = get_predicate_candidate_samples_table( "Sample" + predicate_name.title() + subject_ne.title() + object_ne.title()) result.append({ "predicate_name": predicate_name, "predicate_URI": predicate_URI, "candidate_subclass": CandidateSubclass, "subject_ne": subject_ne, "object_ne": object_ne, "subject_type": subject_type, "object_type": object_type, "label_group": pca.id, "sample_class": sample_class, "words": words, "configs": predicate_configs }) return result