Esempio n. 1
0
    IDtoprocess = HTIDlist[index].strip()
    filepath, postfix = FileCabinet.pairtreepath(IDtoprocess, datapath)
    filename = filepath + postfix + '/' + postfix + ".txt"

    try:
        with open(filename, encoding='utf-8') as file:
            lines = file.readlines()
            successflag = True
    except IOError as e:
        successflag = False

    if not successflag:
        print(IDtoprocess + " is missing.")
        continue

    tokens = TokenGen.keep_hyphens(lines, Lexicon, verbose=debug)

    volacc = TypeIndex.GetAcc(tokens, Lexicon, debug)

    types = TypeIndex.GetTypes(tokens, verbose=debug)

    TypeIndex.UpdateIndex(BigIndex, types, volacc, debug)

### Deletes BigIndex after copying to list in order to save memory

SortedIndex = TypeIndex.SortIndex(BigIndex, debug)

del BigIndex

TypeIndex.WriteIndex(SortedIndex, outputpath + writename, delim, debug)
Esempio n. 2
0
    IDtoprocess = IDtoprocess.strip()
    filepath, postfix = FileCabinet.pairtreepath(IDtoprocess, datapath)
    filename = filepath + postfix + '/' + postfix + ".txt"

    try:
        with open(filename, encoding='utf-8') as file:
            lines = file.readlines()
            successflag = True
    except IOError as e:
        successflag = False

    if not successflag:
        print(IDtoprocess + " is missing.")
        continue
        
    tokens = TokenGen.keep_hyphens(lines,Lexicon,verbose=debug)

    if len(tokens) < 10:
        print(IDtoprocess, "has only tokencount", len(tokens))

    volacc = TypeIndex.GetAcc(tokens,Lexicon,debug)

    types = TypeIndex.GetTypes(tokens,verbose=debug)

    TypeIndex.UpdateIndex(BigIndex, types, volacc, debug)

### Deletes BigIndex after copying to list in order to save memory

SortedIndex = TypeIndex.SortIndex(BigIndex, debug)

del BigIndex