Beispiel #1
0
def extract_onto_concepts(config_file):
    """Main function
    Return the dictionnary that we obtain with
    find_concept_abstract. It can write this dictionnary
    in a rdf format"""

    config = gf.load_config(config_file)
    ontology_path = config['ontologies']['path']
    abstract_folder = config['abstract_folder']

    # We create a dic where for each ontology we create
    # a key which contains all concepts for this ontology
    concept_dic = {}

    for ontology_name in os.listdir(ontology_path):
        ontology = load_ontology(ontology_path, ontology_name)
        list_concept_uri = get_concept_uri(ontology)
        # list_concept_name = get_list_concept_name(list_concept_uri)

        # write_concepts('concepts.txt', list_concept)
        concept_dic[ontology.name] = list_concept_uri

    dic_concept_abstract = find_concept_abstract(concept_dic, abstract_folder)

    if config['rdf']['write_rdf']:
        rdf_translate(dic_concept_abstract, config['rdf']['file_name'])

    return dic_concept_abstract
Beispiel #2
0
    rdf_normalized = rdf_graph.serialize(format='n3')
    rdf_normalized = rdf_normalized.decode('utf-8')

    if config['rdf']['write_rdf']:
        write_rdf(config['rdf']['file_name'], rdf_normalized)


def merge_graph(rdf_file1, rdf_file2):
    """Merge graph can merge two rdf files
    rdf_file1: Just the path of rdf_file1
    rdf_file2: Same for rdf_file2
    Output the merge of two rdf"""

    rdf_graph.parse(rdf_file1, format='n3')
    rdf_graph.parse(rdf_file2, format='n3')

    # For printing
    # rdf_normalized = rdf_graph.serialize(format='n3')
    # rdf_normalized = rdf_normalized.decode('utf-8')

    return rdf_graph


if __name__ == '__main__':

    config = gf.load_config('config/config_construct_knowledge_graph.yml')
    create_rdf_graph(config)
    """config_abstract = 'config/config_extract.yml'
    for lol1, lol2 in ex_ab.extract_abstracts(config_abstract):
        print('{}\n{}'.format(lol1,lol2))"""
Beispiel #3
0
def extract_abstracts(config_file):
    """ Extract all abstract of all Publication in the website
    and can put it in a single file or multiple files. It depends
    options in the file config """

    config = gf.load_config(config_file)

    # We extract id from articles
    parameters_id = config['parameters_id']
    url = config['url']

    # We collect all id
    list_all_id = get_all_page_id(url, parameters_id)
    parameters_extract_content = config['parameters_extract_content']

    i = 0

    # Initialization paramters for writing
    if config['options']['writing']:
        if config['options']['xml']:
            file_extension = '.xml'
        else:
            file_extension = '.txt'
        if not config['options']['multiple_file']:
            name_file = '{}{}'.format(config['output']['file'], file_extension)
            # If we have one file in xml format, we need to have a unique tag at the beginning
            # and the end of the document. We put the first tag
            if config['options']['xml']:
                add_tag_into_file(name_file, '<informations>\n')

    for doc_id, dic_content in get_all_abstract(url, list_all_id,
                                                parameters_extract_content):
        print('Abstract number: {}'.format(i))

        if config['options']['writing']:
            if config['options']['multiple_file']:
                name_file = '{}{}{}'.format(config['output']['folder'], doc_id,
                                            file_extension)
                write_content_into_file(name_file, dic_content[doc_id], doc_id,
                                        config['options'])

            else:
                write_content_into_file(name_file, dic_content[doc_id], doc_id,
                                        config['options'])

        if config['options']['csv']:
            """with open(config['output']['csv'], 'a') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=('abstract', 'keywords', 'title'), restval='')
                if i == 0:
                    writer.writeheader()
                writer.writerow(dic_content[doc_id])"""
            with open(config['output']['csv'], 'a', newline='') as csvfile:
                title = dic_content[doc_id]['title']
                keywords = dic_content[doc_id]['keywords']
                abstract = dic_content[doc_id]['abstract']
                regroup_all = '{} {} {}'.format(title, keywords, abstract)
                writer = csv.writer(csvfile,
                                    quotechar='"',
                                    quoting=csv.QUOTE_ALL)
                writer.writerow(['vgibox'] + [doc_id] + [regroup_all])

        i = i + 1

        yield doc_id, dic_content

    if config['options']['writing']:
        # We put the last tag if we have a unique file for writing
        if not config['options']['multiple_file']:
            add_tag_into_file(name_file, '\n</informations>')
Beispiel #4
0
    answer = get_dic_saussure()
    data_json = json.dumps(answer, indent=1, ensure_ascii=False)
    name_file = 'saussure_dic.json'

    with open(name_file, 'w') as nf:
        nf.write(data_json)

    json1_file = open('saussure_dic.json')
    json1_str = json1_file.read()
    json1_data = json.loads(json1_str)

    # We convert dic json data into flare json data
    flare_json = convert_dic_to_flare_json(json1_data)

    # We add a unique root to our visualization
    answer = {'name':'Root', 'children':flare_json}

    data_json = json.dumps(answer, indent=1, ensure_ascii=False)
    name_file = 'static/method_schema/saussure.json'

    with open(name_file, 'w') as nf:
        nf.write(data_json)



if __name__ == '__main__':

    config = gf.load_config('config/config_manage_sparql.yml')

    write_informations_for_visualization(config)
Beispiel #5
0
def extract_yml_to_dic(file_name):
    """Take a yml file and return a dictionnary"""
    yml_dic = gf.load_config(file_name)
    return yml_dic
Beispiel #6
0
            (cui[name], SKOS.prefLabel, Literal(dic_file['method_name'])))
    else:
        # If we don't have a method name, we just generate a big
        # number for the name
        nb_alea = randint(10000, 99999)
        name = '{}'.format(nb_alea)
        rdf_graph.add((cui[name], RDF.type, cui.knowledge_extractor_result))

    # If we have informations we add it
    if 'method_informations' in dic_file:
        rdf_graph.add(
            (cui[name], SKOS.note, Literal(dic_file['method_informations'])))

    rdf_graph.add((cui[name], RDFS.subClassOf, SKOS.ConceptScheme))

    dic_to_rdf(dic_file['root'], 'Root', rdf_graph, name)

    rdf_normalized = rdf_graph.serialize(format='n3')
    rdf_normalized = rdf_normalized.decode('utf-8')
    # print(rdf_normalized)

    gf.write_rdf(output_file, rdf_normalized)

    return rdf_normalized


if __name__ == '__main__':

    config = gf.load_config('config/config_add_method_knowledge_graph.yml')
    create_rdf_graph(config)