#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__ == "__main__": 'Processing Corpus 属性' e = ProcessingCorpus() print e.__doc__ print e.__dict__ print e.__module__ e.gendiffpicture("intent", type="java") for k, v in e.__dict__.items(): print k, ":", v for k, v in ProcessingCorpus.__dict__.items(): print "-" + k, ":"
#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__=="__main__": '预处理文本' e=ProcessingCorpus() print e.__doc__ print e.__dict__ e.gendiffpredealtxt() print "java" e.gendiffpredealtxt(type ="java") print "xml" e.gendiffpredealtxt(type= "xml")
#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__ == "__": '生成diff的图片' e = ProcessingCorpus() print e.__doc__ print e.__dict__ e.gendiffpicture("intent", type="java", size=3) e.gendiffpicture("SNAPSHOT", type="all") e.gendiffpicture("intent", type="xml")
#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__ == "__main__": '生成词向量模型' e = ProcessingCorpus() print e.__doc__ print e.__dict__ e.genmodel(type="all") e.genmodel(type="java") e.genmodel(type="xml")
#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__=="__main__": '获得单词的相似度' e=ProcessingCorpus() e.gendiffpicture("Intent", type="java", size=2) # word=e.getsmailarword("intent",type="all",number=10) # print word # e.gendiffpicture("intent",type="xml")
#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__ == "__main__": '清洗文本' e = ProcessingCorpus() print e.__doc__ print e.__dict__ e.gendiffsanitizationtxt(type="all") e.gendiffsanitizationtxt(type="java") e.gendiffsanitizationtxt(type="xml")
#!/usr/bin/python # -*- coding: UTF-8 -*- from ProcessingCorpus import ProcessingCorpus if __name__ == "__main__": '生成字典' e = ProcessingCorpus() print e.__doc__ print e.__dict__ e.gendirectory(type="xml") e.gendirectory(type="java") e.gendirectory(type="xml")