Example #1
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     for sentence in morphologies:
         for m in sentence:
             if m['pos'] == '動詞':
                 print m['surface']
     
     return None
Example #2
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     for sentence in morphologies:
         for m in sentence:
             if m['pos'] == '名詞' and m['pos1'] == 'サ変接続':
                 print m['surface']
     
     return None
Example #3
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     for sentence in morphologies:
         for i in range(len(sentence)):
             if sentence[i]['surface'] == 'の' and 0 < i < len(sentence) - 1 and sentence[i-1]['pos'] == '名詞' and sentence[i+1]['pos'] == '名詞':
                 print sentence[i-1]['surface'] + sentence[i]['surface'] + sentence[i+1]['surface']
     
     return None
Example #4
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     words = [m['base'] for sentence in morphologies for m in sentence]
     count = Counter(words)
     for word, freq in count.most_common():
         print word, freq
     
     return None
Example #5
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     words = [m['base'] for sentence in morphologies for m in sentence]
     count = Counter(words)
     plt.hist([f for w, f in count.items()], bins=100, range=(0, 10000))
     plt.title('Histogram of Term Frequency in Bocchan')
     plt.xlabel('Term Frequency')
     plt.ylabel('Freqency')
     plt.show()
     
     return None
Example #6
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     for sentence in morphologies:
         noun_phrase = []
         for m in sentence:
             if m['pos'] == '名詞':
                 noun_phrase.append(m['surface'])
             else:
                 if len(noun_phrase) >= 2:
                     print ''.join(noun_phrase)
                 noun_phrase = []
     
     return None
Example #7
0
    def solve(self):
        '''
        insert your code
        '''
        pc = PickleCache()
        morphologies = pc.get('./neko.txt.mecab.pickle')
        for sentence in morphologies:
            noun_phrase = []
            for m in sentence:
                if m['pos'] == '名詞':
                    noun_phrase.append(m['surface'])
                else:
                    if len(noun_phrase) >= 2:
                        print ''.join(noun_phrase)
                    noun_phrase = []

        return None
Example #8
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     words = [m['base'] for sentence in morphologies for m in sentence]
     count = Counter(words)
     top10 = count.most_common(10)
     ind = np.arange(10)
     width = 0.35
     plt.bar(ind, [f for w, f in top10], width)
     plt.title('Top 10 of Term Frequency in Bocchan')
     plt.xlabel('Terms')
     plt.ylabel('Frequency')
     plt.xticks(ind + width / 2, [w.decode('utf-8') for w, f in top10])
     plt.show()
     
     return None
Example #9
0
    def solve(self):
        '''
        insert your code
        '''
        pc = PickleCache()
        morphologies = pc.get('./neko.txt.mecab.pickle')
        words = [m['base'] for sentence in morphologies for m in sentence]
        count = Counter(words)
        top = count.most_common()
        rank = np.arange(len(top)) + 1
        freq = [f for w, f in top]
        plt.scatter(rank, freq, s=10)
        plt.title('Relationship between Rank and Value of Term Frequency')
        plt.xlabel('Rank (Logarithmic)')
        plt.ylabel('Term Frequency (Logarithmic)')
        plt.xscale('log')
        plt.yscale('log')
        plt.show()

        return None
Example #10
0
 def solve(self):
     '''
     insert your code
     '''
     pc = PickleCache()
     morphologies = pc.get('./neko.txt.mecab.pickle')
     words = [m['base'] for sentence in morphologies for m in sentence]
     count = Counter(words)
     top = count.most_common()
     rank = np.arange(len(top)) + 1
     freq = [f for w, f in top]
     plt.scatter(rank, freq, s=10)
     plt.title('Relationship between Rank and Value of Term Frequency')
     plt.xlabel('Rank (Logarithmic)')
     plt.ylabel('Term Frequency (Logarithmic)')
     plt.xscale('log')
     plt.yscale('log')
     plt.show()
     
     return None