Ejemplo n.º 1
0
def main():
	#optional arguments:
	argument_parser = argparse.ArgumentParser(description='Bipartite Clustering algorithm.')
	argument_parser.add_argument('-wc','--wc',type=int,default=150,help='number of word clusters. default=100.')
	argument_parser.add_argument('-dc','--dc',type=int,default=75,help='number of document clusters. default=100.')
	argument_parser.add_argument('-test','--test',help="Work with test set.",action='store_true')
	parsed_arg = argument_parser.parse_args()
	
	test_docVector = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_test.docVectors"
	gold_std = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.gold_standards"
	dev_docVector = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.docVectors"
	word_df = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.df"
	if parsed_arg.test:
		filename = test_docVector
	else:
		filename = dev_docVector
	
	doc_output = "doc_clusters.out"
	word_output = "word_clusters.out"
	begin_time = time.time()
	criteria = 0.01
	bpc.main(parsed_arg.wc,parsed_arg.dc,filename,doc_output,word_output,word_df,criteria)
	
	output = {}
	output['runtime'] = time.time()-begin_time
	print output
	sys.argv = ['eval.py',doc_output,gold_std]
	execfile('eval.py')
	print('')
	print('')
Ejemplo n.º 2
0
def main():
	#optional arguments:
	#argument_parser = argparse.ArgumentParser(description='Bipartite Clustering algorithm.')
	#argument_parser.add_argument('-wc','--wc',type=int,default=100,help='number of word clusters. default=100.')
	#argument_parser.add_argument('-dc','--dc',type=int,default=50,help='number of document clusters. default=100.')
	#argument_parser.add_argument('-test','--test',help="Work with test set.",action='store_true')
	#parsed_arg = argument_parser.parse_args()
	
	test_docVector = "HW2_test.docVectors"
	gold_std = "HW2_dev.gold_standards"
	dev_docVector = "HW2_dev.docVectors"
	word_df = "HW2_dev.df"
	filename = dev_docVector
	
	doc_output = "doc_clusters.out"
	word_output = "word_clusters.out"
	begin_time = time.time()
	for wc in (50,100,150,200):
		for dc in (50,100,150,200):
			begin_time = time.time()
			bpc.main(wc,dc,filename,doc_output,word_output,word_df,0.1)
			output = {}
			output['wc']=wc
			output['dc']=dc
			output['runtime'] = time.time()-begin_time
			print output
			sys.argv = ['eval.py',doc_output,gold_std]
			execfile('eval.py')
			print('')
			print('')
Ejemplo n.º 3
0
def start(event):
    global ucontrol, listener
    global image_tk
    global testing
    testing = False
    if image_tk is not None:
        del image_tk
    clear_can()
    if listener is None:
        listener = MyListener()
    if ucontrol is None:
        ucontrol = bpc.uControl(listener)
    try:
        name = "test%d.uct" % start.tid
        sys, dia = bpc.main(name, listener, ucontrol, abort=abort)
        image_fn = 'images/%s.png' % name
        figure(620)
        title('%.0f/%.0f' % (sys, dia))
        savefig(image_fn)

        start.tid += 1
        im = Image.open(image_fn)
        display_image(im)
        can.update_idletasks()
        clf()
        start_b.top()
    finally:
        ucontrol.deflate(10, fast=True)
Ejemplo n.º 4
0
def start(event):
    global ucontrol, listener
    global image_tk
    global testing
    testing = False
    if image_tk is not None:
        del image_tk
    clear_can()
    if listener is None:
        listener = MyListener()
    if ucontrol is None:
        ucontrol = bpc.uControl(listener)
    try:
        name = "test%d.uct" % start.tid
        sys, dia = bpc.main(name, listener, ucontrol, abort=abort)
        image_fn = 'images/%s.png' % name
        figure(620)
        title('%.0f/%.0f' % (sys, dia))
        savefig(image_fn)

        start.tid += 1
        im = Image.open(image_fn)
        display_image(im)
        can.update_idletasks()
        clf()
        start_b.top()
    finally:
        ucontrol.deflate(10, fast=True)
Ejemplo n.º 5
0
def main():	
	test_docVector = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_test.docVectors"
	gold_std = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.gold_standards"
	dev_docVector = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.docVectors"
	word_df = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.df"
	filename = test_docVector
	
	doc_output = "usmadja-test-clusters.txt"
	word_output = "test-word_clusters.out"
	begin_time = time.time()
	criteria = 0.01
	bpc.main(150,75,filename,doc_output,word_output,word_df,criteria)
	
	output = {}
	output['runtime'] = time.time()-begin_time
	print output
Ejemplo n.º 6
0
def main():
	test_docVector = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_test.docVectors"
	gold_std = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.gold_standards"
	dev_docVector = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.docVectors"
	word_df = "C:\Smuzi\CMU\ML Txt mining\HW\HW2\data\HW2_dev.df"
	filename = dev_docVector
	
	doc_output = "doc_clusters.out"
	word_output = "word_clusters.out"
	
	for criteria in (1,0.1,0.01,0.001):
		begin_time = time.time()
		bpc.main(150,75,filename,doc_output,word_output,word_df,criteria)
		output={}
		output['criteria']= criteria
		output['runtime'] = time.time()-begin_time
		print output
		sys.argv = ['eval.py',doc_output,gold_std]
		execfile('eval.py')
		print('')
		print('')