Python DBConnect.insert_log 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: corporachar.utils.db_manager

클래스/타입: DBConnect

메소드/함수: insert_log

hotexamples.com에서의 예제들: 3

Python DBConnect.insert_log - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 corporachar.utils.db_manager.DBConnect.insert_log에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

insert_log(3)

init_model(1)

insert_annotations(1)

insert_ontologies(1)

insert_word(1)

select_tags(1)

예제 #1

파일 보기

파일: bioportal_annotator.py 프로젝트: ficolo/corpora-char-cli

def annotate_doc(pdf_file_path, ontologies):
    if pdf_file_path.endswith('pdf') or pdf_file_path.endswith('PDF'):
        text = textract.process(pdf_file_path, method="pdfminer")
    elif pdf_file_path.endswith('html') or pdf_file_path.endswith('htm'):
        text = textract.process(pdf_file_path, method="beautifulsoup4")
    elif pdf_file_path.endswith('txt'):
            with open(pdf_file_path, 'r') as file:
                text = file.read()
    db = DBConnect()
    if text.isspace():
        log = {
            'file_name': pdf_file_path.encode('utf-8'),
            'error': 'Failed PDF to text transformation in annotation process',
            'exception': '',
            'data': ''
        }
        db.insert_log(log)
        return
    ontologies = ",".join(ontologies)
    annotations = []
    text = unidecode(text.decode('utf8'))
    text = ' '.join(text.split())
    # post_data = dict(apikey=settings.BIOPORTAL_API_KEY, text=text,
    #                  display_links='true', display_context='false', minimum_match_length='3',
    #                  exclude_numbers='true', longest_only='true', ontologies=ontologies, exclude_synonyms='true')
    post_data = dict(apikey=settings.BIOPORTAL_API_KEY, text=text,
                     display_links='true', display_context='false', minimum_match_length='3',
                     exclude_numbers='true', longest_only='true', ontologies=ontologies, exclude_synonyms='true')
    try:
        response = requests.post(settings.ANNOTATOR_URL, post_data)
        json_results = json.loads(response.text)
        for result in json_results:
            for annotation in result['annotations']:
                context_begin = annotation['from']  if annotation['from'] - 40 < 1 else annotation['from'] - 40
                context_end = annotation['to'] if annotation['to'] + 40 > len(text) else annotation['to'] + 40
                record = {
                    'file_name': pdf_file_path.encode('utf-8'),
                    'bio_class_id': result['annotatedClass']['@id'],
                    'bio_ontology_id': result['annotatedClass']['links']['ontology'],
                    'text': u'' + annotation['text'].encode('utf-8'),
                    'match_type': annotation['matchType'],
                    'context': u''+text[context_begin:context_end]
                }
                annotations.append(record)
        db.insert_annotations(annotations)
        return
    except (ValueError, IndexError, KeyError) as e:
        print e
        log = {
            'file_name': pdf_file_path.encode('utf-8'),
            'error': 'Bad response from Bioportal Annotator',
            'exception': str(e),
            'data': ''
        }
        db.insert_log(log)
        return

예제 #2

파일 보기

파일: bioportal_annotator.py 프로젝트: ficolo/corpora-char-cli

def get_recommendations_file(pdf_file_path):
        if pdf_file_path.endswith('pdf') or pdf_file_path.endswith('PDF'):
            text = textract.process(pdf_file_path, method="pdfminer")
        elif pdf_file_path.endswith('html') or pdf_file_path.endswith('htm'):
            text = textract.process(pdf_file_path, method="beautifulsoup4")
        elif pdf_file_path.endswith('txt'):
            with open(pdf_file_path, 'r') as file:
                text = file.read()
        if text.isspace():
            log = {
                'file_name': pdf_file_path.encode('utf-8'),
                'error': 'Failed PDF to text transformation in recommendation process',
                'exception': '',
                'data': ''
            }
            db = DBConnect()
            db.insert_log(log)
            return []
        abstract_index = text.find('abstract')
        abstract_index += text.find('ABSTRACT')
        abstract_index += text.find('Abstract')
        abstract_index = 0 if abstract_index < 0 else abstract_index
        text = unidecode(text.decode('utf8'))
        text = ' '.join(text.split())
        text = text[abstract_index:abstract_index+500] if len(text) > 500 else text
        post_data = dict(apikey=settings.BIOPORTAL_API_KEY, input=text, include='ontologies',
                         display_links='false', output_type='2', display_context='false',
                         wc='0.15', ws='1.0', wa='1.0', wd='0.5')
        try:
            response = requests.post(settings.RECOMMENDER_URL, post_data)
            json_results = json.loads(response.text)
            best_ontology_set = json_results[0]['ontologies'] if len(json_results) > 0 else []
            return [{'acronym': ontology['acronym'], 'id': ontology['@id']} for ontology in best_ontology_set]
        except (ValueError, IndexError, KeyError) as e:
            log = {
                'file_name': '',
                'error': 'Bad response from Bioportal Recommender:',
                'exception': str(e),
                'data': ''
            }
            db = DBConnect()
            db.insert_log(log)
            return []

예제 #3

파일 보기

파일: text_mining.py 프로젝트: ficolo/corpora-char-cli

def word_count(pdf_file_path):
    if pdf_file_path.endswith('pdf') or pdf_file_path.endswith('PDF'):
        text = textract.process(pdf_file_path, method="pdfminer")
    elif pdf_file_path.endswith('html') or pdf_file_path.endswith('htm'):
        text = textract.process(pdf_file_path, method="beautifulsoup4")
    elif pdf_file_path.endswith('txt'):
        with open(pdf_file_path, 'r') as file:
            text = file.read()
    if text.isspace():
            log = {
                'file_name': pdf_file_path.encode('utf-8'),
                'error': 'Failed PDF to text transformation in recommendation process',
                'exception': '',
                'data': ''
            }
            db = DBConnect()
            db.insert_log(log)
            return []
    text = unicode(text, 'utf-8')
    words = word_tokenize(text.upper())
    c = Counter()
    c.update(words)
    return c