Ejemplo n.º 1
0
 def get_event_tweets(self, tweets):
     """通常のtweetにイベント情報を付加"""
     if type(tweets) != list:
         tweets = [tweets]
     for tweet in tweets:
         if not self.rule_base.is_event(tweet["text"]):
             continue
         tokens, body = self.tokenizer.tokenize(tweet["text"])
         words = map(lambda node: node.surface, tokens)
         bag_of_words = self.plda_formatter.format(body)
         if not "event_info" in tweet:
             tweet["event_info"] = {
                 "date": "",  # self._date(tweet),
                 "place": self._place(tokens),
                 "body": body,
                 "tokens": map(lambda n: {"word": n.surface, "feature": n.feature}, tokens),
                 "topic": pldautils.get_topic_from_server_p(bag_of_words),
             }
         else:
             info = tweet["event_info"]
             info["body"] = body
             info["tokens"] = map(lambda n: {"word": n.surface, "feature": n.feature}, tokens)
             info["topic"] = pldautils.get_topic_from_server_p(bag_of_words)
         yield tweet
Ejemplo n.º 2
0
 def get_event_tweets(self, tweets):
     """通常のtweetにイベント情報を付加"""
     if type(tweets) != list:
         tweets = [tweets]
     for tweet in tweets:
         if not self.rule_base.is_event(tweet['text']):
             continue
         tokens, body = self.tokenizer.tokenize(tweet['text'])
         words = map(lambda node: node.surface, tokens)
         bag_of_words = self.plda_formatter.format(body)
         if not "event_info" in tweet:
             tweet['event_info'] = {
                 "date":
                 "",  #self._date(tweet),
                 "place":
                 self._place(tokens),
                 "body":
                 body,
                 "tokens":
                 map(lambda n: {
                     "word": n.surface,
                     "feature": n.feature
                 }, tokens),
                 "topic":
                 pldautils.get_topic_from_server_p(bag_of_words)
             }
         else:
             info = tweet["event_info"]
             info["body"] = body
             info["tokens"] = map(
                 lambda n: {
                     "word": n.surface,
                     "feature": n.feature
                 }, tokens)
             info["topic"] = pldautils.get_topic_from_server_p(bag_of_words)
         yield tweet
Ejemplo n.º 3
0
            dd = date_split(d)
            event_dates.append({"y": today.year, "m": int(dd[0]), "d": int(dd[1])})
        return event_dates


if __name__ == "__main__":
    import config
    import json
    import sys

    fty = EventTweetFactory(config.wordfile)
    tokenizer = TweetTokenizer()
    f = open(sys.argv[1], "r")
    if sys.argv[2] == "1":
        recent = [json.loads(s) for s in f]
    elif sys.argv[2] == "2":
        recent = []
        [recent.extend(json.loads(t)) for t in f]
    elif sys.argv[2] == "3":
        formatter = pldautils.PLDAFormatter(config.wordfile)
        for s in f:
            status = json.loads(s)
            tokens = tokenizer.parse(status["text"])
            status["topic"] = pldautils.get_topic_from_server_p(formatter.format(tokens))
            print status
    else:
        pass
#        recent = json.load(f)
#    for tweet in fty.get_event_tweets(recent):
#        print json.dumps(tweet)
Ejemplo n.º 4
0
                "m": int(dd[0]),
                "d": int(dd[1])
            })
        return event_dates


if __name__ == "__main__":
    import config
    import json
    import sys
    fty = EventTweetFactory(config.wordfile)
    tokenizer = TweetTokenizer()
    f = open(sys.argv[1], "r")
    if sys.argv[2] == "1":
        recent = [json.loads(s) for s in f]
    elif sys.argv[2] == "2":
        recent = []
        [recent.extend(json.loads(t)) for t in f]
    elif sys.argv[2] == "3":
        formatter = pldautils.PLDAFormatter(config.wordfile)
        for s in f:
            status = json.loads(s)
            tokens = tokenizer.parse(status["text"])
            status["topic"] = pldautils.get_topic_from_server_p(
                formatter.format(tokens))
            print status
    else:
        pass
#        recent = json.load(f)
#    for tweet in fty.get_event_tweets(recent):
#        print json.dumps(tweet)
Ejemplo n.º 5
0
#words = json.loads(open("backup", "r").read())
#fy = UserModelFactory.UserModelFactory(wordfile)
#bow = fy.formatter.format(words)

fty = EventTweetFactory(wordfile)
#recent = json.loads(open("recenteventtweets.json").read())

fy = UserModelFactory.UserModelFactory(wordfile)
f = open("log_murasaki8823.json", "r")
log = []
[log.extend(json.loads(a)["users"]) for a in f]
profs = []
[profs.extend(fy.parser.parse(l["description"])) for l in log]
bow = fy.formatter.format(profs)
ret = pldautils.get_topic_from_server_p(bow)
mizu = {"topic": ret}
f = open("eventtweetmodels_mixed2.json")
eventtweetmodels = [json.loads(l) for l in f]
recommender = EventTweetRecommender.EventTweetRecommender(eventtweetmodels)
#tweets = recommender.calc(json.load("mizumodel.json"), 30)
tweets = recommender.calc(mizu, 30)
tweets2 = recommender.calc(mizu, 30, "pro")
tokens = [t["event_info"]["tokens"] for t in tweets]
for t in tokens:
    print ' '.join(tt["word"] for tt in t)
f.close()
f = open("eventtweetmodels_mixed.json", "r")
i = 0
for l in f:
    print json.loads(l)["text"]
Ejemplo n.º 6
0
# words = json.loads(open("backup", "r").read())
# fy = UserModelFactory.UserModelFactory(wordfile)
# bow = fy.formatter.format(words)

fty = EventTweetFactory(wordfile)
# recent = json.loads(open("recenteventtweets.json").read())

fy = UserModelFactory.UserModelFactory(wordfile)
f = open("log_murasaki8823.json", "r")
log = []
[log.extend(json.loads(a)["users"]) for a in f]
profs = []
[profs.extend(fy.parser.parse(l["description"])) for l in log]
bow = fy.formatter.format(profs)
ret = pldautils.get_topic_from_server_p(bow)
mizu = {"topic": ret}
f = open("eventtweetmodels_mixed2.json")
eventtweetmodels = [json.loads(l) for l in f]
recommender = EventTweetRecommender.EventTweetRecommender(eventtweetmodels)
# tweets = recommender.calc(json.load("mizumodel.json"), 30)
tweets = recommender.calc(mizu, 30)
tweets2 = recommender.calc(mizu, 30, "pro")
tokens = [t["event_info"]["tokens"] for t in tweets]
for t in tokens:
    print " ".join(tt["word"] for tt in t)
f.close()
f = open("eventtweetmodels_mixed.json", "r")
i = 0
for l in f:
    print json.loads(l)["text"]