Example #1
0
shutil.rmtree("out", ignore_errors=True)

inputpath = sys.argv[1]
print(time()+" ==================================================")
print(time()+" ===================Process start==================")
print(time()+" ==================================================")
print(time()+" input file: " + inputpath)

#parse XML to get all instances
print(time()+" Parsing XML...")
xmlparser = XMLParser()
xmlparser.parse(inputpath)
instances_text_raw = xmlparser.get_raw_text()           #instances_raw contains the (context)entire text between context tags in xml file
instances_text_clean = xmlparser.get_clean_text()       #instances_clean contains the text between context tags in xml file but this text is cleaned-> extra symbols are removed
instances_data_old = xmlparser.get_instances_data()     #instances_data_old contains instance ids and sense ids. Use it to generate key file.
targetword = xmlparser.get_targetword()
print(time()+" Parsing XML finish.")
print(time()+" target word: " + targetword)
print(time() + " "+str(len(instances_text_raw))+" instances found.")

#cluster instances
print(time()+" Clustering instances...")
sense_cluster = SenseCluster()
sense_cluster.cluster(instances_text_clean)
clusters = sense_cluster.get_clusters()
dimensions = sense_cluster.get_dimensions()
instances_data_new = sense_cluster.get_instance_data()
print(time()+" Clustering instances finish...")
print(time()+" "+str(len(clusters)) + " clusters was generated.")

#generate definition