Ejemplo n.º 1
0
def segment_text_job(json_dic,pickle_path,filepath,which):
    '''
    split a text in sentences. 
    json_dic: json object
    which: which part of json the text come from.
    '''
    print('Starting task segment')
    job = init_job('nltk-seg')

    result = __segment_text(json_dic[which]['source'],pickle_path)    
    
    # TODO change ! remove save as well?
    json_dic[which]['segments'] = create_segments(result)
    
    if filepath != '':
        save_json(json_dic,filepath)
        job.meta['doSave'] = '1' 
    else:
        job.meta['output'] = json_dic[which]['segments']
        job.meta['type'] = 'onesegment'
        job.meta['which'] = which
    
    job.meta['progress'] = 100
    job.save_meta()
    return result
Ejemplo n.º 2
0
def maligna_seg_job(text1, text2):
    job = init_job('maligna-seg')

    cmd, cmd2 = cmd_2_split_in_sentences()
    result = execute_short_double(cmd, cmd2, text2xmlstring(text1, text2))
    job.meta['output'] = results2segment_lists(result)
    job.meta['type'] = 'twosegments'
    job.save_meta()
    print('Task completed')
Ejemplo n.º 3
0
def maligna_align_job(jsonData):
    job = init_job('maligna-align')

    cmd = cmd_2_align_sentences()
    result = execute_short_single(cmd, json2xmlstring(jsonData))
    #job.meta['output'] = results2segment_lists(result)
    res = compare_results(jsonData, result)
    job.meta['output'] = res
    job.meta['type'] = 'align'
    job.save_meta()
    print('Task completed')
Ejemplo n.º 4
0
def split_in_sentences_with_model(json_dic,which,language):
    print('Starting')
    job = init_job('spacy-seg')

    raw_text = json_dic[which]['source']
    nlp = __load_spacy(language)
    doc = nlp(raw_text)
    sentences = [sent.string.strip() for sent in doc.sents]

    json_dic[which]['segments'] = create_segments(sentences)
    
    job.meta['output'] = json_dic[which]['segments']
    job.meta['type'] = 'onesegment'
    job.meta['which'] = which
    job.meta['progress'] = 100
    job.save_meta()
    return sentences # why?
Ejemplo n.º 5
0
def split_in_sentences(json_dic,which):
    print('Starting')
    job = init_job('spacy-seg')

    raw_text = json_dic[which]['source']
    nlp = English()
    nlp.add_pipe(nlp.create_pipe('sentencizer'))
    doc = nlp(raw_text)
    sentences = [sent.string.strip() for sent in doc.sents]    
    
    json_dic[which]['segments'] = create_segments(sentences)
    
    job.meta['output'] = json_dic[which]['segments']
    job.meta['type'] = 'onesegment'
    job.meta['which'] = which
    job.meta['progress'] = 100
    job.save_meta()
    return sentences # why?