Ejemplo n.º 1
0
def get_my_string_list(input_file: File) -> List[str]:
    try:
        instream = input_file.openText()
        output = instream.readlines()
    except:
        instream = input_file.openText(encoding='ISO-8859-1')
        output = instream.readlines()
    return (output)
Ejemplo n.º 2
0
def find_inline_terms_for_file_list(file_list: File, dict_prefix: str = None) -> None:
    start = True
    with file_list.openText() as instream:
        # if dict_prefix:
        #     unigram_dictionary.clear()
        ## see derive_plurals in term_utilities
        ## and other instances of "unigram_dict" below
        for line in instream.readlines():
            file_prefix = line.strip()

            # @semanticbeeng @todo @dataFlow
            lines: List[str] = get_lines_from_file(
                File[TXT3](file_prefix + '.txt3'))  ## add feature to remove xml

            run_abbreviate_on_lines(lines,
                 File[ABBR](file_prefix + '.abbr'), reset_dictionary=start)       # @semanticbeeng @todo @arch global state mutation

            ## creates abbreviation files and acquires abbreviation --> term
            ## and term --> abbreviation dictionaries
            ## Possibly add alternative which loads existing abbreviation files into
            ## dictionaries for future steps (depends on timing)

            # if dict_prefix:
            #     increment_unigram_dict_from_lines(lines)

            find_inline_terms(lines,
                 File[FACT](file_prefix + '.fact'),
                 File[POS](file_prefix + '.pos'),
                 File[TERM](file_prefix + '.terms'))

            if start:
                start = False
        if dict_prefix:
            save_abbrev_dicts(File[ABBR](str(dict_prefix) + ".dict_abbr_to_full"),
                              File[ABBR](str(dict_prefix) + ".dict_full_to_abbr"))
Ejemplo n.º 3
0
def load_web_score_dict_file(dict_file: File) -> None:

    global webscore_dict
    webscore_dict.clear()

    if os.path.isfile(dict_file.name):
        with dict_file.openText() as instream:
            for line in instream.readlines():
                line = line.strip(os.linesep)
                term, score = line.split('\t')
                webscore_dict[term] = float(score)

        webscore_dict = dictionary.freeze_dict(webscore_dict)       # @semanticbeeng @todo global state initialization: this fails
    else:
        print(dict_file, 'does not exist. Will be created')
Ejemplo n.º 4
0
def _get_abbreviations(file: File) -> None:
    """Import abbreviations from jargon file"""
    global abbreviations

    f = file.openText()
    for line in f.readlines():
        temp = line.split('|||')
        fullword = temp[0]
        shortwords = temp[1].split('||')
        for w in shortwords:
            abbreviations[w] = fullword

    abbreviations = dictionary.freeze_dict(
        abbreviations)  # @semanticbeeng @todo global state initialization
    f.close()
Ejemplo n.º 5
0
def read_in_org_dictionary(dict_file: File,
                           dictionary: str = 'org',
                           shallow: bool = True,
                           lower: bool = False,
                           patent: bool = False) -> None:

    global organization_dictionary
    global location_dictionary
    global nationality_dictionary

    if dictionary == 'org':
        organization_dictionary.clear()
    elif dictionary == 'loc':
        location_dictionary.clear()
    elif dictionary == 'nat':
        nationality_dictionary.clear()
    elif dictionary == 'discourse':
        raise Exception('undefined variable discourse_dictionary'
                        )  # @semanticbeeng @todo dead code
        # discourse_dictionary.clear()
    elif dictionary == 'term_relation':
        raise Exception('undefined variable term_rel_dictionary'
                        )  # @semanticbeeng @todo dead code
        # term_rel_dictionary.clear()

    with dict_file.openText(mode='r') as instream:
        for line in instream.readlines():
            add_dictionary_entry(line,
                                 dictionary,
                                 shallow,
                                 lower=lower,
                                 patent=patent)

    # @semanticbeeng @arch global state immutable
    if dictionary == 'org':
        organization_dictionary = freeze_dict(organization_dictionary)
    elif dictionary == 'loc':
        location_dictionary = freeze_dict(location_dictionary)
    elif dictionary == 'nat':
        nationality_dictionary = freeze_dict(nationality_dictionary)