コード例 #1
0
def read_text(start_day=0,
              per_iter=10,
              iteration=5,
              jp=False,
              writer=AbstractDBWriter()):
    day = datetime.datetime.today() + datetime.timedelta(start_day)
    api = tw.Api(oauth)
    rt = "%20-rt"
    day_fn = tp.dfdj if jp else tp.dfd
    for i in range(1, iteration):
        try:
            q = day_fn(day, per_iter) + rt + "%20" + tp.event_jp
            for j in range(1, 5):
                r = api.search(q=q,
                               rpp="100",
                               page=str(j),
                               lang="ja",
                               response_type="recent")
                for k in r['results']:
                    print k['text']
                #for k in r['results']:
                #    writer.write(k['text'].encode('utf8'))
            day = day + datetime.timedelta(per_iter)
        except:
            time.sleep(3)
    writer.close()
コード例 #2
0
def main():
    u, p = sys.argv[1:3]
    f = open(str(time.time()), 'w')
    parser = MeCabParser.Parser()
    api = tw.Api(tw.BasicAuth(u, p))
    for t in api.sample_stream():
        try:
            if t['user']['lang'] == 'ja':
                t['parse_text'] = parser.parse(normalize(t['text']))
                f.write(json.dumps(t) + '\n')
                f.flush()
                print ','.join(t['parse_text'])
        except:
            pass
コード例 #3
0
def read_slow(start=0, n=10, jp=False, filename="hoge"):
    api = tw.Api(tw.OAuth(ck, cs, tk, ts))
    rt = "%20-rt"
    day_fn = tp.dfdj if jp else tp.dfd
    writer = FileWriter(filename)
    while 1:
        day = datetime.datetime.today() + datetime.timedelta(start)
        since_id = '0'
        res = api.search(q=day_fn(day, n) + "%20" + tp.event_jp + rt,
                         rpp="100",
                         since_id=since_id,
                         result_type="recent")
        since_id = res['mac_id_str']
        results = res['results']
        for r in results:
            writer.write(json.dumps(r))
        time.sleep(10)
コード例 #4
0
def read(start_day=0, per_iter=10, iteration=5, writer=AbstractDBWriter()):
    day = datetime.datetime.today() + datetime.timedelta(start_day)
    api = tw.Api(tw.OAuth(ck, cs, tk, ts))
    rt = "%20-rt"
    for i in range(1, iteration):
        try:
            q = tp.days_from_day_jp(day, i * per_iter) + rt
            for j in range(1, 15):
                r = api.search(q=q, rpp="100", page=str(j), lang="ja")
                for k in r['results']:
                    tokens = parser.parse(k['text'])
                    for token in tokens:
                        print token
                        writer.write(token)
            day = day + datetime.timedelta(i * per_iter)
        except e:
            print e
            time.sleep(10)
    writer.close()
コード例 #5
0
#!/usr/bin/env python
#coding: utf8

import sys
from tinytwitter import tinytwitter as tw
import MeCabParser
import datetime
from config import *
import json
from pldautils import PLDAFormatter, get_topic_from_server
import MeCabParser


api = tw.Api(oauth)
backup = open("backup.txt", "w")

class Parser(MeCabParser.Parser):
    def parse(self, s):
        """ちゃんとした名詞のみを抜き出す"""
        node = self.node(s)
        ret = []
        while node:
            surface = node.surface
            if surface != "" and "名詞" in node.feature and not "名詞,数" in node.feature and not "非自立" in node.feature:
                #if to_unicode:
                #    surface = surface.decode(self.encoding)
                ret.append(surface)
            node = node.next
        return ret