예제 #1
0
파일: getStats.py 프로젝트: aboSamoor/lydia
def add2Results(partial, store):
    for k in partial.keys():
        for v in partial[k].keys():
            if not store.has_key(k):
                store[k] = {}
            if not store[k].has_key(v):
                store[k][v] = 0
            store[k][v] += partial[k][v]
    return store

if __name__=="__main__":
    amira_dir = settings.amira_dir
    results = {}
    if len(sys.argv) < 3:
        print "usage: getStats.py folder format[json|...]\nFolder should contain files in .t extension"
    folder = os.path.abspath(sys.argv[1])
    fmt  = sys.argv[2]
    i = 0
    for fName in tools.files(folder, ".*"):
        fName = os.path.abspath(fName)
        if fmt == 'json':
            if i%100 == 0:
                print  i, " files finished"
            partial = parseJson(fName, isNNPVirt, "NER")
            i+=1
        else:
            partial = parsePostNER(fName)
        add2Results(partial, results)
    statFile = os.path.join(folder, 'stats')
    tools.dumpJson(results, statFile)
예제 #2
0
    fh = open(fin, 'r')
    text = fh.read()
    fh.close()
    return text

def writeText(fout, text):
    fout = os.path.abspath(fout)
    fh = open(fout, 'w')
    text = fh.write(text)
    fh.close()

def make_gazet(fin):
    txt1 = getText(fin)
    txt3 = tools.get_columns(txt1,'->', [0])
    txt4 = re.sub('\w+\s*\n','',txt3);
#    txt5 = re.sub('\n+','\n',txt4);  
    writeText(fin+'.dict',txt4)
              
if __name__=="__main__":    
    f = os.path.abspath(sys.argv[1])
    if os.path.isfile(f):
        fName = f
        make_gazet(fName)
        exit()
    
    folder = f
    for fName in tools.files(folder,"(.*?)\.d$"):
        fName = os.path.abspath(fName)
        print fName
        make_gazet(fName)
예제 #3
0
파일: tag.py 프로젝트: aboSamoor/lydia
            if i > 1:
                if isTag(lines[i-1],"DET"):
                    if isTag(lines[i-2], "NN"):
                        curLine["NER"] = lines[i-2]["NER"]
    return jText


def tagFile(f, dictionary):
    jText = tools.loadJson(f)
    if jText == -1:
        return -1
    jNewText = tag(jText, dictionary)
    tools.dumpJson(jNewText, f+".t")

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print "usage: tag.py dictionary.csv file\nfile expected in json format"
    f = os.path.abspath(sys.argv[2])
    dicName = os.path.abspath(sys.argv[1])
    listOfDicts= csvConverter.csv2dicts(dicName)
    dictionary = csvConverter.buildDictionary(listOfDicts, "word")
    if os.path.isfile(f):
        tagFile(f, dictionary)
    elif os.path.isdir(f):
        i = 0
        for fName in tools.files(f, ".*\.json$"):
            if i%100 == 0:
                print "finished", i, "files"
            tagFile(fName,dictionary)
            i+=1