Exemple #1
0
    def __init__(self):
        (sq1, rate1) = poisson.simulate(period=600)
        sq1 = np.array(sq1)

        (sq2, rate2) = hpk.simulate(180)
        sq2 = np.array(sq2) + 420

        self.create_parameters()

        t0 = 1425847547.

        stream1 = []
        for element in sq1:
            item = stream.RawTweetItem(t0 + 60 * element, 0,
                                       self.get_doc(self.g_topics))
            stream1.append(item)

        stream2 = []
        for element in sq2:
            item = stream.RawTweetItem(t0 + 60 * element, 0,
                                       self.get_doc(self.b_topics))
            stream2.append(item)

        self.stream_all = stream1 + stream2

        self.stream_all.sort(key=lambda x: x.datetime())

        self.index = 0
    def next(self):
        _t = None
        _user = None
        _tweet = None

        while True:
            try:
                line = self.f.next()
                if line.startswith('content: '):
                    _tweet = line.split('content: ')[1]

                if line.startswith('userId: '):
                    _user = eval(line.split('userId: ')[1])

                if line.startswith('publishedTimeGmt: '):
                    _t = eval(line.split('publishedTimeGmt: ')[1]) / 1000
                    _t += 8*60*60# 8 hours

                    if _t > self.end:
                        return stream.End_Of_Stream

                if line.startswith('-------------'):
                    item = stream.RawTweetItem(_t, _user, _tweet)
                    return item

                if line is None:
                    return stream.End_Of_Stream
            except:
                return stream.End_Of_Stream

        return None
Exemple #3
0
def source2tweet(source):
    _t = datetime.strptime(source["created_at"], '%Y-%m-%d %H:%M')
    _user = source["uid"]
    _tweet = source["text"]

    item = stream.RawTweetItem(_t, _user, _tweet)
    item.attach(source)
    return item
    def next(self):
        row = self.cursor.fetchone()

        if row is None:
            self.cursor.close()
            self.connection.close()

            self.dy_start = self.dy_start + td(days=1)
            if self.dy_start < self.dy_end:
                self.connection = MySQLdb.connect(host='?',
                                                  user='******',
                                                  db='?',
                                                  charset='utf8')

                self.cursor = self.connection.cursor()

                _time0 = self.dy_start.strftime("%Y-%m-%d")
                _time1 = (self.dy_start + td(days=1)).strftime("%Y-%m-%d")

                sql_str = 'select * from ' + 'weibo_timelines' + ' where created_at >= "%s" and created_at < "%s" order by created_at' % (
                    _time0, _time1)
                print sql_str

                self.cursor.execute(sql_str)

                row = self.cursor.fetchone()
            else:
                return stream.End_Of_Stream

        _obj = {
            "mid": row[self.id_map["mid"]],
            "uid": row[self.id_map["uid"]],
            "retweet_num": row[self.id_map["retweet_num"]],
            "comment_num": row[self.id_map["comment_num"]],
            "favourite_num": row[self.id_map["favourite_num"]],
            "created_at": row[self.id_map["created_at"]],
            "from": row[self.id_map["from"]],
            "text": row[self.id_map["text"]],
            "entity": row[self.id_map["entity"]],
            "source_mid": row[self.id_map["source_mid"]],
            "source_uid": row[self.id_map["source_uid"]],
            "mentions": row[self.id_map["mentions"]],
            "check_in": row[self.id_map["check_in"]],
            "check_in_url": row[self.id_map["check_in_url"]],
            "is_deleted": row[self.id_map["is_deleted"]],
            "timestamp": row[self.id_map["timestamp"]],
        }

        _t = datetime.strptime(_obj["created_at"], '%Y-%m-%d %H:%M')
        _user = _obj["uid"]
        _tweet = _obj["text"]

        item = stream.RawTweetItem(_t, _user, _tweet)
        item.attach(_obj)
        return item
Exemple #5
0
    def load(self, start, end):

        res = self.range(start, end)

        while res is None:
            print 'Search range failure. Sleeping 300 seconds.'
            time.sleep(300)
            res = self.range(start, end)

        for row in res:
            _obj = {
                "mid" : row[self.id_map["mid"]],
                "uid" : row[self.id_map["uid"]],
                "retweet_num" : row[self.id_map["retweet_num"]],
                "comment_num" : row[self.id_map["comment_num"]],
                "favourite_num" : row[self.id_map["favourite_num"]],
                "created_at" : row[self.id_map["created_at"]],
                "from" : row[self.id_map["from"]],
                "text" : row[self.id_map["text"]],
                "entity" : row[self.id_map["entity"]],
                "source_mid" : row[self.id_map["source_mid"]],
                "source_uid" : row[self.id_map["source_uid"]],
                "mentions" : row[self.id_map["mentions"]],
                "check_in" : row[self.id_map["check_in"]],
                "check_in_url" : row[self.id_map["check_in_url"]],
                "is_deleted" : row[self.id_map["is_deleted"]],
                "timestamp" : row[self.id_map["timestamp"]],
            }

            _t = datetime.strptime(_obj["created_at"], '%Y-%m-%d %H:%M')
            _user = _obj["uid"]
            _tweet = _obj["text"]

            item = stream.RawTweetItem(_t, _user, _tweet)
            item.attach(_obj)

            self.deq.append(item)

        print 'LOADING FINISHED.', len(self.deq), start, end
        dt = self.delta.seconds / 60.0
        sys_monitor.report_rate(len(self.deq)/dt, end)

        self.start = end
    def __init__(self):
        self.deq = deque([])
        f = open('./tweets.txt', 'r')

        _t = None
        _user = None
        _tweet = None

        for line in f:
            if line.startswith('content: '):
                _tweet = line.split('content: ')[1]

            if line.startswith('userId: '):
                _user = eval(line.split('userId: ')[1])

            if line.startswith('publishedTimeGmt: '):
                _t = eval(line.split('publishedTimeGmt: ')[1]) / 1000
                _t += 8 * 60 * 60  # 8 hours

            if line.startswith('-------------'):
                item = stream.RawTweetItem(_t, _user, _tweet)
                self.deq.append(item)

        print 'LOADING FINISHED.'