Beispiel #1
0
def predict_list(path_list, maxlen=None):
    """
    """
    os.makedirs(dataset_text_folder, exist_ok=True)
    files = list(glob(os.path.join(dataset_text_folder, '*')))
    print('files=%d %s' % (len(files), files))
    # assert files
    nn = NeuroNER(parameters_filepath=parameters_filepath)
    results = {}
    try:
        for i, path in enumerate(path_list):
            print('~' * 80)
            print('Processing %d of %d' % (i, len(path_list)), end=': ')
            text, entities, times = predict(nn, path)
            results[path] = (text, entities, times)
    except Exception as e:
        print('^' * 80)
        print('Failed to process %s' % path)
        print(type(e))
        print(e)
        raise
    nn.close()
    print('=' * 80)
    print('Completed %d of %d' % (len(results), len(path_list)))

    print('!' * 80)
    print('files=%d %s' % (len(path_list), path_list[:5]))

    for i, path in enumerate(path_list[:len(results)]):
        text, entities, (t0, t1, t2) = results[path]
        print('*' * 80)
        print('%3d: %5d %8d %s' % (i, len(text), os.path.getsize(path), path))
        if not text:
            continue
        print('pdftotext=%4.1f sec' % (t1 - t0))
        print('  predict=%4.1f sec' % (t2 - t1))
        print('    total=%4.1f sec %4.0f chars/sec ' % ((t2 - t0), len(text) /
                                                        (t2 - t0)))
        summarize(entities, max_texts=10)

    all_entities = []
    all_text_len = 0
    all_t = 0.0
    for i, path in enumerate(path_list[:len(results)]):
        text, entities, (t0, t1, t2) = results[path]
        all_text_len += len(text)
        all_entities.extend(entities)
        all_t += t2 - t0
    print('#' * 80)
    print('All files: %d files length=%d' % (len(results), all_text_len))
    if all_text_len:
        print('    total=%4.1f sec %4.0f chars/sec ' %
              (all_t, all_text_len / all_t))
        summarize(all_entities, max_texts=100)
Beispiel #2
0
def main(argv=sys.argv):
    ''' NeuroNER main method

    Args:
        parameters_filepath the path to the parameters file
        output_folder the path to the output folder
    '''
    # Parse arguments
    arguments = parse_arguments(argv[1:])

    nn = NeuroNER()
    entities = nn.predict(
        'www fresnobee com news local crime article179830941 html Sex offender Snyder could live in Fresno motel | The Fresno Bee Mobile & Apps Jeffrey Snyder  62  was allowed supervised release in June after serving prison time for molesting children. He may be moved to a motel just south of downtown Fresno. FRESNO COUNTY DISTRICT ATTORNEY S OFFICE Jeffrey Snyder  62  was allowed supervised release in June after serving prison time for molesting children. He may be moved to a motel just south of downtown Fresno. FRESNO COUNTY DISTRICT ATTORNEY S OFFICE Sexually violent predator Jeffrey Snyder could soon have a new place to live By Rory Appleton Order Reprint of this Story October 19  2017 3:11 PM The California Department of State Hospitals suggested a new option Thursday for housing 62-year-old Jeffrey Snyder  who was conditionally released in June after serving out a sentence for molesting children. The El Muir Motel at 2339 S. G St. is the proposed location. It is a one-story  6 400-square-foot building with 20 guest rooms located just south of downtown Fresno and west of Calwa near Golden State Boulevard. The Fresno County District Attorney s office is accepting public comment to include in its formal response in court. Anyone who wishes to submit a comment is asked to email [email protected] or mail to the office at 2220 Tulare St.  Suite 1000  Fresno CA 93721. Mailed comments should have  Attn: Sexual Assault Unit  written on the envelope. Snyder s placement has been a fierce topic of discussion for both law enforcement and the general public for almost a year . A proposed placement at a home in Squaw Valley was met with fierce opposition by neighbors  and the property eventually burned down. Of the 1 749 houses the state looked at  none met the required criteria for placement of a sexually violent offender. A group of Cal Fire firefighters confer after putting out a mobile home fire- where sex offender Jeffrey Snyder was to be housed- at Dunlap on Sage Lane in January. JOHN WALKER [email protected]'
    )
    print(entities)
    nn.close()
Beispiel #3
0
def main(argv=sys.argv):
    """ NeuroNER main method
    Args:
        parameters_filepath the path to the parameters file
        output_folder the path to the output folder
    """
    arguments = parse_arguments(argv[1:])

    nn = NeuroNER(**arguments)
    nn.fit()
    nn.close()
Beispiel #4
0
def predict(path):
    """ NeuroNER main method
    Args:
        parameters_filepath the path to the parameters file
        output_folder the path to the output folder
    """
    pdftotext(path, path_txt)
    files = list(glob(os.path.join(dataset_text_folder, '*')))
    print('files=%d %s' % (len(files), files))
    assert files

    nn = NeuroNER(parameters_filepath=parameters_filepath)
    nn.fit()
    nn.close()
Beispiel #5
0
def main(argv=sys.argv):
    ''' NeuroNER main method

    Args:
        parameters_filepath the path to the parameters file
        output_folder the path to the output folder
    '''
    # Parse arguments
    arguments = parse_arguments(argv[1:])

    nn = NeuroNER(**arguments)
    entire_time = time.time()
    #nn.fit()
    #e = nn.predict("Butch has the opportunity to just walk out of Maynard and Zed's shop.")
    #print(e)
    with open('./whole.json') as f:
        data = json.load(f)

    num_data = len(data)
    contexts = [data[i]['context'] for i in range(num_data)]

    print('num_data : %d' % num_data)
    for i in range(num_data):
        #for i in range(106, num_data):
        print('Process %d data.....' % (i + 1))
        if len(contexts[i]) == 0:
            print('no data in context %d' % (i + 1))
            continue

        t = time.time()

        context = contexts[i]
        statement = [
            np.concatenate((context[i]), axis=None)
            for i in range(len(context))
        ]
        sentence = np.concatenate((statement), axis=None)

        statement_len = [len(statement[j]) for j in range(len(statement))]
        statement_index = list()
        statement_index.append(0)
        for j in range(len(statement_len)):
            statement_index.append(statement_index[j] + statement_len[j])

        sentence_index = list()
        for j in range(len(context)):
            s = context[j]
            s_len = [len(s[k]) for k in range(len(s))]
            s_index = list()
            s_index.append(0)
            for k in range(len(s_len)):
                s_index.append(s_index[k] + s_len[k])
            sentence_index.append(s_index)

        text = ' '.join(list(sentence))
        entities = nn.predict(text)

        end_index = 0
        end_list = list()

        for j in range(len(sentence)):
            end_index += len(sentence[j])
            end_list.append(end_index)
            end_index += 1

        statement_iter = 0
        sentence_iter = 0
        ner = list()
        ner_statement = list()
        for entity in entities:
            e_type = entity['type']
            e_start = entity['start']
            e_end = entity['end']

            start = -1
            end = -1
            for j in range(len(sentence)):
                if end_list[j] > e_start:
                    start = j
                    break

            for j in range(len(sentence) - 1, -1, -1):
                if end_list[j] <= e_end:
                    end = j + 1
                    break

            while start >= statement_index[statement_iter + 1]:
                statement_iter += 1
                sentence_iter = 0
                ner.append(ner_statement)
                ner_statement = list()

            while start >= statement_index[statement_iter] + sentence_index[
                    statement_iter][sentence_iter + 1]:
                sentence_iter += 1

            start -= (statement_index[statement_iter] +
                      sentence_index[statement_iter][sentence_iter])
            end -= (statement_index[statement_iter] +
                    sentence_index[statement_iter][sentence_iter])
            ner_statement.append([sentence_iter, start, end, e_type])

        ner.append(ner_statement)

        data[i]['ner'] = ner
        print('Done(%.2fs)' % (time.time() - t))

    with open('whole_ner.json', 'w') as f:
        json.dump(data, f)

    print('Elapsed Time : %.2fs' % (time.time() - entire_time))
    nn.close()
Beispiel #6
0
import spacy
from neuroner import NeuroNER
import load_parameters

model_folder = '../trained_models/conll_2003_en'
init_dataset_folder = '../data/example_unannotated_texts'

arguments = load_parameters.parse_arguments(
    pretrained_model_folder=model_folder, init_dataset=init_dataset_folder)

nn = NeuroNER(**arguments)
spacy_nlp = spacy.load('en')


#this function can be repeated several times to depoy as a service without loading the model and core_nlp again
def predict(text):
    return nn.new_predict(text, spacy_nlp)