Example #1
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import ProcessingCorpus

if __name__ == "__main__":
    'Processing Corpus 属性'
    e = ProcessingCorpus()
    print e.__doc__
    print e.__dict__
    print e.__module__

    e.gendiffpicture("intent", type="java")

    for k, v in e.__dict__.items():
        print k, ":", v

    for k, v in ProcessingCorpus.__dict__.items():
        print "-" + k, ":"
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import  ProcessingCorpus
if __name__=="__main__":
    '预处理文本'
    e=ProcessingCorpus()
    print e.__doc__
    print e.__dict__
    e.gendiffpredealtxt()
    print "java"
    e.gendiffpredealtxt(type ="java")
    print "xml"
    e.gendiffpredealtxt(type= "xml")
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import ProcessingCorpus
if __name__ == "__":
    '生成diff的图片'
    e = ProcessingCorpus()
    print e.__doc__
    print e.__dict__
    e.gendiffpicture("intent", type="java", size=3)

    e.gendiffpicture("SNAPSHOT", type="all")
    e.gendiffpicture("intent", type="xml")
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import ProcessingCorpus
if __name__ == "__main__":
    '生成词向量模型'
    e = ProcessingCorpus()
    print e.__doc__
    print e.__dict__
    e.genmodel(type="all")
    e.genmodel(type="java")
    e.genmodel(type="xml")
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import ProcessingCorpus
if __name__=="__main__":
    '获得单词的相似度'
    e=ProcessingCorpus()
    e.gendiffpicture("Intent", type="java", size=2)

    # word=e.getsmailarword("intent",type="all",number=10)
    # print word
    # e.gendiffpicture("intent",type="xml")
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import ProcessingCorpus

if __name__ == "__main__":
    '清洗文本'
    e = ProcessingCorpus()
    print e.__doc__
    print e.__dict__

    e.gendiffsanitizationtxt(type="all")
    e.gendiffsanitizationtxt(type="java")
    e.gendiffsanitizationtxt(type="xml")
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from ProcessingCorpus import ProcessingCorpus

if __name__ == "__main__":
    '生成字典'
    e = ProcessingCorpus()
    print e.__doc__
    print e.__dict__
    e.gendirectory(type="xml")
    e.gendirectory(type="java")
    e.gendirectory(type="xml")