Example #1
0
def main():
	
	filename = "stars_data.csv"
	data = a.read_data(filename)
	data.pop(0)
	random.shuffle(data)
	frequency = a.frequency_word(data)
	
	data_neg = [x for x in data if int(x[6]) == 1]
	data_pos = [x for x in data if int(x[6]) == 5]

	matrix_pos = np.zeros((2000,2500))
	matrix_neg = np.zeros((2000,2500))
	matrix_pos = cluster.create_matrix(matrix_pos,data_pos,frequency)
	matrix_neg = cluster.create_matrix(matrix_neg,data_neg,frequency)
	
	kmeans_feature = cluster.kmeans_bin(data,matrix_pos,matrix_neg,frequency,50)
	smeans_feature = cluster.smeans_bin(data,matrix_pos,matrix_neg,frequency,50)	
	origin_feature = a.create_binary_feature(data,frequency,6)
	
	sample_origin_feature = a.create_binary_feature(data,random.sample(frequency,100),6)
	combine_feature = combine(kmeans_feature,sample_origin_feature)
	
	print "Test1"
	test1(matrix_pos,matrix_neg)
	print "Test2"
	test2(kmeans_feature,smeans_feature)
	print "Test3"
	test3(origin_feature,kmeans_feature)
	print "Test4"
	test4(sample_origin_feature,kmeans_feature,combine_feature)
Example #2
0
def main():
	
	#data preprocessing
	filename = "stars_data.csv"
	data = a.read_data(filename)
	data.pop(0)
	random.shuffle(data)
	words = a.frequency_word(data)	
	features = a.create_binary_feature(data,words,6)
	words.append("isPositive")
	words.append("isNegative")
	minsupport = 0.03
	minconf = 3.81
	
	L,support_count = apriori.frequentItemsetGeneration(features,words,minsupport)
	print len(L[0]) + len(L[1]) + len(L[2])
	rules = ruleG(L,support_count,minconf)
	print len(rules)
	rules = sorted(rules.items(),key=operator.itemgetter(1),reverse= True)
	rules = [rules[i] for i in range(30)]
	
	for rule in rules:
		print rule
Example #3
0
def main():
	
	#data preprocessing
	filename = "stars_data.csv"
	data = a.read_data(filename)
	data.pop(0)
	random.shuffle(data)
	words = a.frequency_word(data)	
	features = a.create_binary_feature(data,words,6)
	words.append("isPositive")
	words.append("isNegative")
	minsupport = 0.03
	minconf = 0.25
	
	D = construct(features,words)
	D = map(set, D)
	t = []
	t.append(frozenset(['friendly']))
	t.append(frozenset(['isPositive']))
	t.append(frozenset(['staff']))
	t.append(frozenset(['favorite']))
	
	q2(D,t)
	'''