def extractRevsLabel(filetype, pattern):

	review = open('../data/chres_review.json')
	revsKW = open('../data/revsKW'+filetype+'.json','w+')

	revs = []
	lab = []

	i = 1
	if pattern=='AllWords':
		for index,line in enumerate(review):
		    jre = json.loads(line)
		    jstar = jre['stars']  
		    text = jre['text']  
		    ws = tp.removeStopPunc(text)
		    lab.append(jstar)
		    revs.append(ws)
		    print i
		    # if i==50: break
		    i += 1
	else:			
		for index,line in enumerate(review):
		    jre = json.loads(line)
		    jstar = jre['stars']  
		    text = jre['text']  

		    tagText = tp.posTag(text)
		    adj = tp.posExtract(tagText,pattern)
		    adjs = ' '.join(adj)
		    ws = tp.removeStopPunc(adjs)

		    lab.append(jstar)
		    revs.append(ws)
		    print i
		    # if i==50: break
		    i += 1

	np.save('../data/label'+filetype+'.npy', np.array(lab))
	json.dump(revs, revsKW)

	review.close()
	revsKW.close()
	return 1
Esempio n. 2
0
def extractRevsLabel(filetype, pattern):

    review = open('../data/chres_review.json')
    revsKW = open('../data/revsKW' + filetype + '.json', 'w+')

    revs = []
    lab = []

    i = 1
    if pattern == 'AllWords':
        for index, line in enumerate(review):
            jre = json.loads(line)
            jstar = jre['stars']
            text = jre['text']
            ws = tp.removeStopPunc(text)
            lab.append(jstar)
            revs.append(ws)
            print i
            # if i==50: break
            i += 1
    else:
        for index, line in enumerate(review):
            jre = json.loads(line)
            jstar = jre['stars']
            text = jre['text']

            tagText = tp.posTag(text)
            adj = tp.posExtract(tagText, pattern)
            adjs = ' '.join(adj)
            ws = tp.removeStopPunc(adjs)

            lab.append(jstar)
            revs.append(ws)
            print i
            # if i==50: break
            i += 1

    np.save('../data/label' + filetype + '.npy', np.array(lab))
    json.dump(revs, revsKW)

    review.close()
    revsKW.close()
    return 1
Esempio n. 3
0
label = open('../data/labelNN.txt','w+')
data = open('../data/dataNN.txt','w+')

voca = []
revs = []
lab = []
dat = []

i=1
for line in review:
    jre = json.loads(line)
    jstar = jre['stars']  
    text = jre['text']  

    tagText = tp.posTag(text)
    adj = tp.posExtract(tagText,'NN.*')
    adjs = ' '.join(adj)
    ws = tp.removeStopPunc(adjs)

    if ws!=[]:
	    lab.append(jstar)
	    revs.append(ws)
	    voca.extend(ws)
	    print i
	    # if i==5: break
	    i += 1


print lab
for i in lab:
	label.write(str(i)+"\n")
labelAdj = open('../data/labelAdj.txt', 'w+')
data = open('../data/dataAdj.txt', 'w+')

voca = []
revs = []
lab = []
dat = []

i = 1
for line in review:
    jre = json.loads(line)
    jstar = jre['stars']
    text = jre['text']

    tagText = tp.posTag(text)
    adj = tp.posExtract(tagText, 'JJ.*')
    adjs = ' '.join(adj)
    ws = tp.removeStopPunc(adjs)

    if ws != []:
        lab.append(jstar)
        revs.append(ws)
        voca.extend(ws)
        print i
        # if i==5: break
        i += 1

print lab
for i in lab:
    labelAdj.write(str(i) + "\n")
print "successfully create labelAdj!"
labelAdj = open("../data/labelAdj.txt", "w+")
data = open("../data/dataAdj.txt", "w+")

voca = []
revs = []
lab = []
dat = []

i = 1
for line in review:
    jre = json.loads(line)
    jstar = jre["stars"]
    text = jre["text"]

    tagText = tp.posTag(text)
    adj = tp.posExtract(tagText, "JJ.*")
    adjs = " ".join(adj)
    ws = tp.removeStopPunc(adjs)

    if ws != []:
        lab.append(jstar)
        revs.append(ws)
        voca.extend(ws)
        print i
        # if i==5: break
        i += 1


print lab
for i in lab:
    labelAdj.write(str(i) + "\n")