Beispiel #1
0
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
clusters = corpus.clusters('experience', size=3, show_progress=True)
print(clusters.df.head(10))
clusters.save_excel(corpus.output_path + 'clusters.xlsx')
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
wordlist = corpus.wordlist(show_progress=True)
keywords = corpus.keywords(wordlist, show_progress=True)
keywords_dispersion = corpus.keywords_dispersion(keywords, show_progress=True)
print(keywords_dispersion.df.head(10))
keywords_dispersion.save_excel(corpus.output_path + 'keywords_dispersion.xlsx')
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
collocates = corpus.collocates('experience',
                               left_span=2,
                               right_span=2,
                               coll_pos='IN NN JJ VBN VBD',
                               show_progress=True)
print(collocates.df.head(10))
collocates.save_excel(corpus.output_path + 'collocates.xlsx')
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
concordances = corpus.concordance('experience', show_progress=True)
print(concordances.df.head(10))
concordances.save_excel(corpus.output_path + 'concordances.xlsx',
                        highlight='R1 R2 R3')
Beispiel #5
0
# -*- coding: utf-8 -*-
# Author: [email protected]
import sys
from kitconc.kit_corpus import Corpus
print('Hello!')
print('')
print("""This is an example on how to create a custom scripts.
\nHere is how arguments are received: 
""")
print('Arg 1 - Workspace: %s' % sys.argv[1])
print('Arg 2 - Corpus: %s' % sys.argv[2])
if len(sys.argv) > 3:
    for i in range(3, len(sys.argv)):
        print('Arg %s: %s' % (i, sys.argv[i]))
print('\nHere is how to use the Corpus object:')
corpus = Corpus(sys.argv[1], sys.argv[2])
print('\nTokens:  %s' % corpus.tokens())
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
kwic = corpus.kwic('experience', show_progress=True)
kwic.sort('R1', 'R2', 'R3')
print(kwic.df.head(10))
kwic.save_excel(corpus.output_path + 'kwic.xlsx', highlight='R1 R2 R3')
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
dispersion = corpus.dispersion('salary')
print(dispersion.df.head(10))
dispersion.save_excel(corpus.output_path + 'dispersion.xlsx')
Beispiel #8
0
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus

corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
collocates = corpus.collocates('skills',
                               left_span=3,
                               right_span=3,
                               coll_pos='NN JJ',
                               show_progress=True)
print(collocates.df.head(10))
collocates.save_excel(corpus.output_path + 'collocates.xlsx')
# plot collocates
collocates.plot_collgraph(node='skills')
Beispiel #9
0
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus

corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
ngrams = corpus.ngrams(size=3, pos='NN IN NN', show_progress=True)
print(ngrams.df.head(10))
ngrams.save_excel(corpus.output_path + 'ngrams.xlsx')
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
kwic = corpus.kwic('skills', show_progress=True)
collocations = corpus.collocations(kwic, show_progress=True)
print(collocations.df.head(10))
collocations.save_excel(corpus.output_path + 'collocations.xlsx')
# plot a collocate distribution
collocations.plot_colldist('strong')
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus 
# reference to the corpus
corpus = Corpus('kitconc-examples/workspace','ads','english')
# make wordlist
wordlist = corpus.wordlist(show_progress=True)
# print the top 10 
print(wordlist.df.head(10))
# save Excel file
wordlist.save_excel(corpus.output_path + 'wordlist.xlsx') 
# -*- coding: utf-8 -*-
"""
Kitconc examples
@author: [email protected]
"""
from kitconc.kit_corpus import Corpus
# reference to the corpus
corpus = Corpus('kitconc-examples/workspace', 'ads', 'english')
# add texts from source folder
corpus.add_texts('kitconc-examples/ads', show_progress=True)