def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') for sentence in morphologies: for m in sentence: if m['pos'] == '動詞': print m['surface'] return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') for sentence in morphologies: for m in sentence: if m['pos'] == '名詞' and m['pos1'] == 'サ変接続': print m['surface'] return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') for sentence in morphologies: for i in range(len(sentence)): if sentence[i]['surface'] == 'の' and 0 < i < len(sentence) - 1 and sentence[i-1]['pos'] == '名詞' and sentence[i+1]['pos'] == '名詞': print sentence[i-1]['surface'] + sentence[i]['surface'] + sentence[i+1]['surface'] return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') words = [m['base'] for sentence in morphologies for m in sentence] count = Counter(words) for word, freq in count.most_common(): print word, freq return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') words = [m['base'] for sentence in morphologies for m in sentence] count = Counter(words) plt.hist([f for w, f in count.items()], bins=100, range=(0, 10000)) plt.title('Histogram of Term Frequency in Bocchan') plt.xlabel('Term Frequency') plt.ylabel('Freqency') plt.show() return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') for sentence in morphologies: noun_phrase = [] for m in sentence: if m['pos'] == '名詞': noun_phrase.append(m['surface']) else: if len(noun_phrase) >= 2: print ''.join(noun_phrase) noun_phrase = [] return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') words = [m['base'] for sentence in morphologies for m in sentence] count = Counter(words) top10 = count.most_common(10) ind = np.arange(10) width = 0.35 plt.bar(ind, [f for w, f in top10], width) plt.title('Top 10 of Term Frequency in Bocchan') plt.xlabel('Terms') plt.ylabel('Frequency') plt.xticks(ind + width / 2, [w.decode('utf-8') for w, f in top10]) plt.show() return None
def solve(self): ''' insert your code ''' pc = PickleCache() morphologies = pc.get('./neko.txt.mecab.pickle') words = [m['base'] for sentence in morphologies for m in sentence] count = Counter(words) top = count.most_common() rank = np.arange(len(top)) + 1 freq = [f for w, f in top] plt.scatter(rank, freq, s=10) plt.title('Relationship between Rank and Value of Term Frequency') plt.xlabel('Rank (Logarithmic)') plt.ylabel('Term Frequency (Logarithmic)') plt.xscale('log') plt.yscale('log') plt.show() return None