def count(path,out): with open(path) as fo,open(out,'w') as fw: posD = file2dic(os.path.join(pwd,'data','pos.txt')) negD = file2dic(os.path.join(pwd,'data','neg.txt')) distribution = {} for line in fo: line = line.strip() feature = line.split(' ')[0] opinion = line.split(' ')[1] if opinion: if posD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos":1,"neg":0} else: distribution[feature]["pos"]+=1 elif negD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos":0,"neg":1} else: distribution[feature]["neg"]+=1 for i in distribution.keys(): posCNT = (0 if distribution[i].get("pos") is None else distribution[i].get("pos")) negCNT = (0 if distribution[i].get("neg") is None else distribution[i].get("neg")) total = posCNT + negCNT posRATIO = "%.4f" %(float(posCNT) /total) negRATIO = "%.4f" %(float(negCNT) /total) fw.write(i+": "+"the positive ratio: "+posRATIO+" , the negative ratio: "+negRATIO+"\n") fw.close()
def count(path, out): with open(path) as fo, open(out, 'w') as fw: posD = file2dic(os.path.join(pwd, 'data', 'pos.txt')) negD = file2dic(os.path.join(pwd, 'data', 'neg.txt')) distribution = {} for line in fo: line = line.strip() feature = line.split(' ')[0] opinion = line.split(' ')[1] if opinion: if posD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos": 1, "neg": 0} else: distribution[feature]["pos"] += 1 elif negD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos": 0, "neg": 1} else: distribution[feature]["neg"] += 1 for i in distribution.keys(): posCNT = (0 if distribution[i].get("pos") is None else distribution[i].get("pos")) negCNT = (0 if distribution[i].get("neg") is None else distribution[i].get("neg")) total = posCNT + negCNT posRATIO = "%.4f" % (float(posCNT) / total) negRATIO = "%.4f" % (float(negCNT) / total) fw.write(i + ": " + "the positive ratio: " + posRATIO + " , the negative ratio: " + negRATIO + "\n") fw.close()
def pos2basket(path, out): exclude = file2dic("./feature-exclude.txt") with open(path) as fo, open(out, 'w') as fw: nnLIST = [] for line in fo: line = line.strip() #if line == "--#PU --#PU --#PU --#PU --#PU": if line == "---------#NR -#PU": if nnLIST: fw.write(','.join(nnLIST) + '\n') nnLIST = [] else: line = line.decode('utf8') match = pat.findall(line) if match: nouns = map(lambda x: x.encode('utf8'), match) for noun in nouns: noun = noun.replace("#NN", '').replace("#NR", '') if exclude.get(noun): continue nnLIST.append(noun) fw.close()
def pos2basket(path,out): exclude = file2dic("./feature-exclude.txt") with open(path) as fo,open(out,'w') as fw: nnLIST= [] for line in fo: line = line.strip() #if line == "--#PU --#PU --#PU --#PU --#PU": if line == "---------#NR -#PU": if nnLIST: fw.write(','.join(nnLIST)+'\n') nnLIST= [] else: line = line.decode('utf8') match = pat.findall(line) if match: nouns = map(lambda x:x.encode('utf8'), match) for noun in nouns: noun = noun.replace("#NN",'').replace("#NR",'') if exclude.get(noun): continue nnLIST.append(noun) fw.close()
distribution[feature]["neg"]+=1 for i in distribution.keys(): posCNT = (0 if distribution[i].get("pos") is None else distribution[i].get("pos")) negCNT = (0 if distribution[i].get("neg") is None else distribution[i].get("neg")) total = posCNT + negCNT posRATIO = "%.4f" %(float(posCNT) /total) negRATIO = "%.4f" %(float(negCNT) /total) fw.write(i+": "+"the positive ratio: "+posRATIO+" , the negative ratio: "+negRATIO+"\n") fw.close() ## the distribution is a global variable distribution = {} posD = file2dic(os.path.join(pwd,'data','pos.txt')) negD = file2dic(os.path.join(pwd,'data','neg.txt')) def countSTREAM(npop,distribution): line = npop.strip() feature = line.split(' ')[0] opinion = line.split(' ')[1] if opinion: if posD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos":1,"neg":0} else: distribution[feature]["pos"]+=1 elif negD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos":0,"neg":1} else:
posCNT = (0 if distribution[i].get("pos") is None else distribution[i].get("pos")) negCNT = (0 if distribution[i].get("neg") is None else distribution[i].get("neg")) total = posCNT + negCNT posRATIO = "%.4f" % (float(posCNT) / total) negRATIO = "%.4f" % (float(negCNT) / total) fw.write(i + ": " + "the positive ratio: " + posRATIO + " , the negative ratio: " + negRATIO + "\n") fw.close() ## the distribution is a global variable distribution = {} posD = file2dic(os.path.join(pwd, 'data', 'pos.txt')) negD = file2dic(os.path.join(pwd, 'data', 'neg.txt')) def countSTREAM(npop, distribution): line = npop.strip() feature = line.split(' ')[0] opinion = line.split(' ')[1] if opinion: if posD.get(opinion): if distribution.get(feature) is None: distribution[feature] = {"pos": 1, "neg": 0} else: distribution[feature]["pos"] += 1 elif negD.get(opinion): if distribution.get(feature) is None: