shutil.rmtree("out", ignore_errors=True) inputpath = sys.argv[1] print(time()+" ==================================================") print(time()+" ===================Process start==================") print(time()+" ==================================================") print(time()+" input file: " + inputpath) #parse XML to get all instances print(time()+" Parsing XML...") xmlparser = XMLParser() xmlparser.parse(inputpath) instances_text_raw = xmlparser.get_raw_text() #instances_raw contains the (context)entire text between context tags in xml file instances_text_clean = xmlparser.get_clean_text() #instances_clean contains the text between context tags in xml file but this text is cleaned-> extra symbols are removed instances_data_old = xmlparser.get_instances_data() #instances_data_old contains instance ids and sense ids. Use it to generate key file. targetword = xmlparser.get_targetword() print(time()+" Parsing XML finish.") print(time()+" target word: " + targetword) print(time() + " "+str(len(instances_text_raw))+" instances found.") #cluster instances print(time()+" Clustering instances...") sense_cluster = SenseCluster() sense_cluster.cluster(instances_text_clean) clusters = sense_cluster.get_clusters() dimensions = sense_cluster.get_dimensions() instances_data_new = sense_cluster.get_instance_data() print(time()+" Clustering instances finish...") print(time()+" "+str(len(clusters)) + " clusters was generated.") #generate definition