def add2Results(partial, store): for k in partial.keys(): for v in partial[k].keys(): if not store.has_key(k): store[k] = {} if not store[k].has_key(v): store[k][v] = 0 store[k][v] += partial[k][v] return store if __name__=="__main__": amira_dir = settings.amira_dir results = {} if len(sys.argv) < 3: print "usage: getStats.py folder format[json|...]\nFolder should contain files in .t extension" folder = os.path.abspath(sys.argv[1]) fmt = sys.argv[2] i = 0 for fName in tools.files(folder, ".*"): fName = os.path.abspath(fName) if fmt == 'json': if i%100 == 0: print i, " files finished" partial = parseJson(fName, isNNPVirt, "NER") i+=1 else: partial = parsePostNER(fName) add2Results(partial, results) statFile = os.path.join(folder, 'stats') tools.dumpJson(results, statFile)
fh = open(fin, 'r') text = fh.read() fh.close() return text def writeText(fout, text): fout = os.path.abspath(fout) fh = open(fout, 'w') text = fh.write(text) fh.close() def make_gazet(fin): txt1 = getText(fin) txt3 = tools.get_columns(txt1,'->', [0]) txt4 = re.sub('\w+\s*\n','',txt3); # txt5 = re.sub('\n+','\n',txt4); writeText(fin+'.dict',txt4) if __name__=="__main__": f = os.path.abspath(sys.argv[1]) if os.path.isfile(f): fName = f make_gazet(fName) exit() folder = f for fName in tools.files(folder,"(.*?)\.d$"): fName = os.path.abspath(fName) print fName make_gazet(fName)
if i > 1: if isTag(lines[i-1],"DET"): if isTag(lines[i-2], "NN"): curLine["NER"] = lines[i-2]["NER"] return jText def tagFile(f, dictionary): jText = tools.loadJson(f) if jText == -1: return -1 jNewText = tag(jText, dictionary) tools.dumpJson(jNewText, f+".t") if __name__ == "__main__": if len(sys.argv) < 3: print "usage: tag.py dictionary.csv file\nfile expected in json format" f = os.path.abspath(sys.argv[2]) dicName = os.path.abspath(sys.argv[1]) listOfDicts= csvConverter.csv2dicts(dicName) dictionary = csvConverter.buildDictionary(listOfDicts, "word") if os.path.isfile(f): tagFile(f, dictionary) elif os.path.isdir(f): i = 0 for fName in tools.files(f, ".*\.json$"): if i%100 == 0: print "finished", i, "files" tagFile(fName,dictionary) i+=1