-
Notifications
You must be signed in to change notification settings - Fork 5
/
genConllGold.py
51 lines (44 loc) · 1.69 KB
/
genConllGold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#main script for generating gold key files of OntoNOtes 5 from the coref files. write to one file for all docs in directory. need 2 args: modes (-p or -w) and file name if p, dir name if w
import ontoConvertConll as onto
import processGold as prep
import sys,os
from os import listdir
#two modes:-p=print, -w=write
mode=sys.argv[1]
inputname=sys.argv[2]
newfile="ontoGoldAll23.key"
#g=open(newfile,"w")
#g.close()
def process(file):
cf=prep.mainPreprocess(file)
output=onto.mainOutput(cf)
return output
#if a directory, process every file and write to one big file. in this mode, we don't write to a single file, simple always append to a file
def writeOneFile(output, outName, firstname):
"""input is the formated list from process() function. then append to a file."""
g=open(outName,"a")
header="\n# begin document "+ firstname +"\n"
g.write(header)
for line in output:
g.write(line+'\n')
g.write("# end document "+ firstname +"\n")
g.close()
#single file mode, print to console, or use shell to redirect to a file
if os.path.isfile(inputname):
firstname=inputname.split(".")[0]
outlist=process(inputname)
if mode=="-p":
print '\n'
print "# begin document "+ firstname +"\n"
for i in outlist:
print i
elif mode=="-w":
writeOneFile(outlist,firstname+'_gold.key',firstname)
#directory mode main: append all outputs to one file
if os.path.isdir(inputname):
onlyfiles = [ f for f in listdir(inputname) if f.endswith(".coref")]
for file in onlyfiles:
firstname=file.split(".")[0]
tfile=inputname+file
outlist=process(tfile)
writeOneFile(outlist,newfile,firstname)