def extractRevsLabel(filetype, pattern): review = open('../data/chres_review.json') revsKW = open('../data/revsKW'+filetype+'.json','w+') revs = [] lab = [] i = 1 if pattern=='AllWords': for index,line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] ws = tp.removeStopPunc(text) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 else: for index,line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] tagText = tp.posTag(text) adj = tp.posExtract(tagText,pattern) adjs = ' '.join(adj) ws = tp.removeStopPunc(adjs) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 np.save('../data/label'+filetype+'.npy', np.array(lab)) json.dump(revs, revsKW) review.close() revsKW.close() return 1
def extractRevsLabel(filetype, pattern): review = open('../data/chres_review.json') revsKW = open('../data/revsKW' + filetype + '.json', 'w+') revs = [] lab = [] i = 1 if pattern == 'AllWords': for index, line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] ws = tp.removeStopPunc(text) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 else: for index, line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] tagText = tp.posTag(text) adj = tp.posExtract(tagText, pattern) adjs = ' '.join(adj) ws = tp.removeStopPunc(adjs) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 np.save('../data/label' + filetype + '.npy', np.array(lab)) json.dump(revs, revsKW) review.close() revsKW.close() return 1
vocabulary = open('../data/vocabularyNN.txt','w+') label = open('../data/labelNN.txt','w+') data = open('../data/dataNN.txt','w+') voca = [] revs = [] lab = [] dat = [] i=1 for line in review: jre = json.loads(line) jstar = jre['stars'] text = jre['text'] tagText = tp.posTag(text) adj = tp.posExtract(tagText,'NN.*') adjs = ' '.join(adj) ws = tp.removeStopPunc(adjs) if ws!=[]: lab.append(jstar) revs.append(ws) voca.extend(ws) print i # if i==5: break i += 1 print lab for i in lab: