def __init__(self,
                 predicate_resume,
                 in_queue=None,
                 out_queue=None,
                 page_size=10000,
                 language='en',
                 kb_SPARQL_endpoint="https://dbpedia.org/sparql",
                 defaultGraph="http://dbpedia.org"):
        """
        in_queue: A Queue of input objects to process; primarily for running in parallel
        """
        Process.__init__(self)
        self.daemon = True
        self.in_queue = in_queue
        self.out_queue = out_queue

        # Each UDF starts its own Engine
        # See http://docs.sqlalchemy.org/en/latest/core/pooling.html#using-connection-pools-with-multiprocessing
        SentimanticSession = get_sentimantic_session()
        self.sentimantic_session = SentimanticSession()
        self.predicate_resume = predicate_resume
        self.page_size = page_size
        self.language = language
        self.kb_SPARQL_endpoint = kb_SPARQL_endpoint
        self.defaultGraph = defaultGraph
예제 #2
0
def save_predicate(predicate_URI):
    logging.info('Saving predicate "%s"', predicate_URI)
    SentimanticSession = get_sentimantic_session()
    sentimantic_session = SentimanticSession()
    predicate_URI = predicate_URI.strip()
    try:
        new_predicate = Predicate(uri=predicate_URI)
        sentimantic_session.add(new_predicate)
        sentimantic_session.commit()
    except IntegrityError:
        logging.warn('Predicate "%s" already existing', predicate_URI)
        sentimantic_session.rollback()
    logging.info('Predicate "%s" saved', predicate_URI)
예제 #3
0
def infer_and_save_predicate_candidates_types(
        predicate_URI, sample_files_base_path="./data/samples/"):
    logging.info(
        'Starting infering predicate "%s" domain, range and candidates types ',
        predicate_URI)
    SentimanticSession = get_sentimantic_session()
    sentimantic_session = SentimanticSession()
    predicate_URI = predicate_URI.strip()
    #retrieve predicate domain
    domains = get_predicate_domains(predicate_URI)
    #retrieve predicate range
    ranges = get_predicate_ranges(predicate_URI)
    predicate = sentimantic_session.query(Predicate).filter(
        Predicate.uri == predicate_URI).first()
    if predicate != None:
        for domain in domains:
            subject_ne = domain["ne"]
            for range in ranges:
                object_ne = range["ne"]
                candidate=sentimantic_session.query(BinaryCandidate) \
                    .filter(BinaryCandidate.subject_namedentity == subject_ne,
                            BinaryCandidate.object_namedentity == object_ne
                            ).first()
                if candidate == None:
                    candidate = BinaryCandidate(subject_namedentity=subject_ne,
                                                object_namedentity=object_ne)
                    sentimantic_session.add(candidate)
                    sentimantic_session.commit()
                    candidate=sentimantic_session.query(BinaryCandidate) \
                        .filter(BinaryCandidate.subject_namedentity == subject_ne,
                                BinaryCandidate.object_namedentity == object_ne
                                ).first()


                pca=sentimantic_session.query(PredicateCandidateAssoc) \
                    .filter(PredicateCandidateAssoc.predicate_id == predicate.id,
                            PredicateCandidateAssoc.candidate_id == candidate.id
                            ).first()
                if pca == None:
                    predicate_split = predicate_URI.split('/')
                    predicate_split_len = len(predicate_split)
                    predicate_name = predicate_split[predicate_split_len -
                                                     1].strip()
                    pca = PredicateCandidateAssoc(predicate_id=predicate.id,
                                                  candidate_id=candidate.id)
                    sentimantic_session.add(pca)
                    sentimantic_session.commit()
    logging.info(
        'Finished infering predicate "%s" domain, range and candidates types ',
        predicate_URI)
예제 #4
0
def get_types_filter_regex():
    SentimanticSession = get_sentimantic_session()
    sentimantic_session = SentimanticSession()
    types = sentimantic_session.query(Type).all()
    i = 0
    filter = "FILTER( "
    for type in types:
        if i != 0:
            filter = filter + " || "
        filter = filter + """ regex(?type, \"""" + type.uri + """\", "i") """
        i = i + 1
    filter = filter + " ) "

    return filter
예제 #5
0
def get_predicate_resume(predicate_configs):
    predicate_URI = predicate_configs['uri']
    result = []
    SentimanticSession = get_sentimantic_session()
    sentimantic_session = SentimanticSession()
    predicate = sentimantic_session.query(Predicate).filter(
        Predicate.uri == predicate_URI).first()
    if predicate != None:
        predicate_URI = predicate_URI.strip()
        pca_list=sentimantic_session.query(PredicateCandidateAssoc) \
            .filter(PredicateCandidateAssoc.predicate_id == predicate.id).all()
        for pca in pca_list:
            candidate=sentimantic_session.query(BinaryCandidate) \
                .filter(BinaryCandidate.id==pca.candidate_id).first()
            subject_ne = candidate.subject_namedentity.strip()
            object_ne = candidate.object_namedentity.strip()
            candidate_name = (subject_ne + object_ne).encode("utf-8")
            CandidateSubclass = candidate_subclass(candidate_name,
                                                   ["subject", "object"])
            try:
                statement = text("""
        CREATE OR REPLACE VIEW """ + candidate_name.lower() + """_view AS
            SELECT document.id AS docid,
        document.name AS docname,
        """ + candidate_name.lower() + """.id AS candid,
        candidate.split,
        sentence.id as sent_id,
        sentence.text,
        predicate.uri as predicate_URI,
        label.value AS label_value,
        marginal.probability
       FROM """ + candidate_name.lower() + """
         JOIN candidate ON candidate.id = """ + candidate_name.lower() + """.id
         JOIN span ON """ + candidate_name.lower() +
                                 """.subject_person_id = span.id
         JOIN sentence ON span.sentence_id = sentence.id
         JOIN document ON sentence.document_id = document.id
         LEFT JOIN label ON candidate.id = label.candidate_id
         LEFT JOIN label_key ON label.key_id = label_key.id
         LEFT JOIN predicate_candidate_assoc ON label_key."group" = predicate_candidate_assoc.id
         left join predicate on predicate_candidate_assoc.predicate_id=predicate.id 
         LEFT JOIN marginal ON marginal.candidate_id = candidate.id;
         
         """)
                get_sentimantic_engine().execute(statement)
            except Exception:
                print("Skip view creation")
            subject_type=sentimantic_session.query(TypeNamedEntityAssoc) \
                .filter(TypeNamedEntityAssoc.namedentity == subject_ne).first().type
            object_type=sentimantic_session.query(TypeNamedEntityAssoc) \
                .filter(TypeNamedEntityAssoc.namedentity == object_ne).first().type

            predicate_name = predicate_configs['name']
            words = predicate_configs["words"]
            sample_class = get_predicate_candidate_samples_table(
                "Sample" + predicate_name.title() + subject_ne.title() +
                object_ne.title())
            result.append({
                "predicate_name": predicate_name,
                "predicate_URI": predicate_URI,
                "candidate_subclass": CandidateSubclass,
                "subject_ne": subject_ne,
                "object_ne": object_ne,
                "subject_type": subject_type,
                "object_type": object_type,
                "label_group": pca.id,
                "sample_class": sample_class,
                "words": words,
                "configs": predicate_configs
            })
    return result