/
ARFFOut.py
116 lines (73 loc) · 1.98 KB
/
ARFFOut.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import joinStrip
import arff
nonStripped,Stripped = joinStrip.createTweetLists()
def ARFFDataset(list1, list2):
placeNames = list()
placenamesLower = list()
follow = list()
followAll = list()
prepositions = list()
prepPhrase = list()
searchWords = list()
with open('.\InputFiles\gazList.txt', 'rb') as f:
for line in f:
k = line
placenamesLower.append(k.strip().lower())
with open('.\InputFiles\prepositions.txt') as f:
for line in f:
k = line
prepositions.append(k.strip().lower())
with open('.\InputFiles\searchWords.txt') as f:
for line in f:
k = line
searchWords.append(k.strip().lower())
for listEle in list1:
pointer = 0
for word in listEle:
wordIndex = listEle[pointer]
if word.lower() in placenamesLower:
follow.append('TRUE')
else:
follow.append('FALSE')
if word[0].isupper() and word[0].isalpha():
follow.append('TRUE')
else:
follow.append('FALSE')
if pointer > 0:
loc1 = listEle[pointer-1]
if loc1.lower() in prepositions:
follow.append('TRUE')
else:
follow.append('FALSE')
else:
follow.append('FALSE')
if pointer < (len(listEle)-1):
loc2 = listEle[pointer+1]
if loc2.lower() in searchWords:
follow.append('TRUE')
else:
follow.append('FALSE')
else:
follow.append('FALSE')
follow.append("isPlace")
pointer += 1
followAll.append(follow)
follow = list()
return followAll
def ARFFCreation():
dataSet = ARFFDataset(Stripped, nonStripped)
attList = [
('Gazetteer', ['TRUE', 'FALSE']),
('CapitalLetter', ['TRUE', 'FALSE']),
('Preposition', ['TRUE', 'FALSE']),
('FollowingWord', ['TRUE', 'FALSE']),
('Place', ['yes', 'no'])
]
obj = {
'description': u'',
'relation': 'PlaceNames',
'attributes': attList,
'data': dataSet,
}
with open('.\CreatedCSVs\TestData.arff', 'a') as f:
f.write(arff.dumps(obj))