Example #1
0
def run_neuroner_predict_erks(project_id, document):
    neuroner_home_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), 'erks/erks_bps/neuroner')
    pretrained_model_folder = os.path.join(neuroner_home_dir,
                                           'trained_models/my_model_2')
    output_folder = os.path.join(neuroner_home_dir, 'output')
    dataset_text_folder = os.path.join(
        neuroner_home_dir, "data",
        project_id + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
    deploy_dir = os.path.join(dataset_text_folder, 'deploy')
    result = None
    try:
        os.makedirs(deploy_dir)
        with open(os.path.join(deploy_dir, "document1.txt"), 'w') as d:
            d.writelines(document)
        argv = []
        argv.append('--train_model=False')
        argv.append('--use_pretrained_model=True')
        argv.append('--dataset_text_folder=' + dataset_text_folder)
        argv.append('--pretrained_model_folder=' + pretrained_model_folder)
        argv.append('--output_folder=' + output_folder)
        arguments = parse_arguments(argv)
        nn = NeuroNER(**arguments)
        nn.fit()
        nn.close()
        result = nn.brat_entities

    except Exception as e:
        log_exception(e)

    return result
Example #2
0
def get_type_system_diagram(project_id):
    result = {}
    try:
        type_system_diagram = models.get_type_system_diagram(project_id)
        result["resultOK"] = True
        result["result"] = type_system_diagram
    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #3
0
    def create_ground_truth(self, documents, document_type):
        str_buffer = []
        is_begin_set = False
        for document in documents:
            ground_truth = self.get_base_ground_truth(document)
            text = ground_truth["text"]

            offset = 0
            begin = 0
            length = len(text)
            iter_data = iter(text)
            try:
                for char in iter_data:
                    if char == "\r":
                        if offset + 1 < length and text[offset + 1] == '\r':
                            self.sentence_parser(ground_truth=ground_truth,
                                                 begin=begin,
                                                 sentence_buffer=str_buffer)
                            str_buffer = []
                            iter_data.next()
                            offset += 1
                            is_begin_set = False
                        else:
                            str_buffer.append(char)
                    elif char in self.sentence_breaker:
                        str_buffer.append(char)
                        self.sentence_parser(ground_truth=ground_truth,
                                             begin=begin,
                                             sentence_buffer=str_buffer)
                        str_buffer = []
                        is_begin_set = False

                    else:
                        str_buffer.append(char)
                        if not is_begin_set:
                            begin = offset
                            is_begin_set = True

                    offset += 1
                self.sentence_parser(ground_truth=ground_truth,
                                     begin=begin,
                                     sentence_buffer=str_buffer)
            except Exception as e:
                log_exception(e)

            ground_truth_collection.insert_one({
                "project_id": self.global_project_id,
                "global_document_id": self.global_document_id,
                "ground_truth": ground_truth,
                "document_type": document_type
            })
Example #4
0
def online_text_ground_truth(project_id):
    result = {}

    try:
        result = {}
        document = models.get_online_text_ground_truth(project_id)
        result["resultOK"] = True
        result["document"] = document

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #5
0
def get_relationship_type_list(project_id):
    result = {}
    relationship_type_list = None

    try:
        result = {}
        relationship_type_list = models.get_relationship_type_list(project_id)
        result["resultOK"] = True
        result["list"] = relationship_type_list

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #6
0
def ground_truth(project_id):
    result = {}

    try:
        document_id = str(request.json['document_id'])
        result = {}
        document = models.get_ground_truth(project_id, document_id)
        result["resultOK"] = True
        result["document"] = document

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #7
0
def sire_info(project_id):
    result = {}

    try:
        document_id = str(request.json['document_id'])
        result = {}
        document = models.get_sire_info(project_id)
        result["resultOK"] = True
        result["sireInfo"] = document

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #8
0
def save_all_typesystem(project_id):
    result = {}

    try:
        type_system_diagram = request.json['typeSystemDiagram']
        entity_types = request.json['entityTypes']
        relation_types = request.json['relationTypes']
        save_result = models.save_all_typesystem(project_id=project_id, type_system_diagram=type_system_diagram, entity_types=entity_types, relation_types=relation_types)
        result["resultOK"] = True
        result["result"] = save_result

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #9
0
def get_entity_type_list(project_id='asdf'):
    result = {}
    entity_type_list = None

    try:
        #project_id = str(request.json['project_id'])
        result = {}
        entity_type_list = models.get_entity_type_list(project_id)
        result["resultOK"] = True
        result["list"] = entity_type_list

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #10
0
def save_all_annotation(project_id):
    result = {}

    try:
        ground_truth_id = str(request.json['ground_truth_id'])
        save_data = request.json['saveData']
        result = {}
        save_result = models.save_all_annotation(
            project_id, ground_truth_id=ground_truth_id, save_data=save_data)
        result["resultOK"] = True
        result["result"] = save_result

    except Exception as e:
        result["resultOK"] = False
        result["message"] = str(Exception)
        log_exception(e)

    return dumps(result, ensure_ascii=False)
Example #11
0
    def document_parser(self, data):

        offset = 0
        str_buffer = []
        is_name = True
        is_in_quote = False
        documents = []

        document_index = 1

        if self.global_modified_date is None:
            self.global_modified_date = self.get_epoch_time()

        tmp_document = self.get_base_document(
            document_index=document_index,
            modified_date=self.global_modified_date)
        """에라 모르겠다...정규식은 실패임"""
        try:
            length = len(data)

            iter_data = iter(data)

            for char in iter_data:

                if is_name:
                    if char == ",":
                        tmp_document["name"] = ''.join(str_buffer).strip()
                        str_buffer = []
                        is_name = False

                    else:
                        str_buffer.append(char)

                else:
                    if is_in_quote:
                        if char == '"':
                            if offset + 1 < length and data[offset + 1] == '"':
                                str_buffer.append(char)
                                # str_buffer.append(data[offset + 1])
                                #iter_data.next()
                                next(iter_data)

                            else:
                                tmp_document["text"] = ''.join(
                                    str_buffer).strip()
                                str_buffer = []
                                is_name = True
                                is_in_quote = False
                                documents.append(tmp_document)
                                document_index += 1
                                tmp_document = self.get_base_document(
                                    document_index=document_index,
                                    modified_date=self.global_modified_date)

                        else:
                            str_buffer.append(char)

                    else:
                        if char == '"':
                            is_in_quote = True
                        else:
                            if char == " " or char == "\t":
                                pass
                            else:
                                raise MyException("wrong document format")

                offset += 1
            # print documents

        except MyException as e:
            log_exception(e)
        except Exception as e:
            log_exception(e)

        return documents