def get_event_tweets(self, tweets): """通常のtweetにイベント情報を付加""" if type(tweets) != list: tweets = [tweets] for tweet in tweets: if not self.rule_base.is_event(tweet["text"]): continue tokens, body = self.tokenizer.tokenize(tweet["text"]) words = map(lambda node: node.surface, tokens) bag_of_words = self.plda_formatter.format(body) if not "event_info" in tweet: tweet["event_info"] = { "date": "", # self._date(tweet), "place": self._place(tokens), "body": body, "tokens": map(lambda n: {"word": n.surface, "feature": n.feature}, tokens), "topic": pldautils.get_topic_from_server_p(bag_of_words), } else: info = tweet["event_info"] info["body"] = body info["tokens"] = map(lambda n: {"word": n.surface, "feature": n.feature}, tokens) info["topic"] = pldautils.get_topic_from_server_p(bag_of_words) yield tweet
def get_event_tweets(self, tweets): """通常のtweetにイベント情報を付加""" if type(tweets) != list: tweets = [tweets] for tweet in tweets: if not self.rule_base.is_event(tweet['text']): continue tokens, body = self.tokenizer.tokenize(tweet['text']) words = map(lambda node: node.surface, tokens) bag_of_words = self.plda_formatter.format(body) if not "event_info" in tweet: tweet['event_info'] = { "date": "", #self._date(tweet), "place": self._place(tokens), "body": body, "tokens": map(lambda n: { "word": n.surface, "feature": n.feature }, tokens), "topic": pldautils.get_topic_from_server_p(bag_of_words) } else: info = tweet["event_info"] info["body"] = body info["tokens"] = map( lambda n: { "word": n.surface, "feature": n.feature }, tokens) info["topic"] = pldautils.get_topic_from_server_p(bag_of_words) yield tweet
dd = date_split(d) event_dates.append({"y": today.year, "m": int(dd[0]), "d": int(dd[1])}) return event_dates if __name__ == "__main__": import config import json import sys fty = EventTweetFactory(config.wordfile) tokenizer = TweetTokenizer() f = open(sys.argv[1], "r") if sys.argv[2] == "1": recent = [json.loads(s) for s in f] elif sys.argv[2] == "2": recent = [] [recent.extend(json.loads(t)) for t in f] elif sys.argv[2] == "3": formatter = pldautils.PLDAFormatter(config.wordfile) for s in f: status = json.loads(s) tokens = tokenizer.parse(status["text"]) status["topic"] = pldautils.get_topic_from_server_p(formatter.format(tokens)) print status else: pass # recent = json.load(f) # for tweet in fty.get_event_tweets(recent): # print json.dumps(tweet)
"m": int(dd[0]), "d": int(dd[1]) }) return event_dates if __name__ == "__main__": import config import json import sys fty = EventTweetFactory(config.wordfile) tokenizer = TweetTokenizer() f = open(sys.argv[1], "r") if sys.argv[2] == "1": recent = [json.loads(s) for s in f] elif sys.argv[2] == "2": recent = [] [recent.extend(json.loads(t)) for t in f] elif sys.argv[2] == "3": formatter = pldautils.PLDAFormatter(config.wordfile) for s in f: status = json.loads(s) tokens = tokenizer.parse(status["text"]) status["topic"] = pldautils.get_topic_from_server_p( formatter.format(tokens)) print status else: pass # recent = json.load(f) # for tweet in fty.get_event_tweets(recent): # print json.dumps(tweet)
#words = json.loads(open("backup", "r").read()) #fy = UserModelFactory.UserModelFactory(wordfile) #bow = fy.formatter.format(words) fty = EventTweetFactory(wordfile) #recent = json.loads(open("recenteventtweets.json").read()) fy = UserModelFactory.UserModelFactory(wordfile) f = open("log_murasaki8823.json", "r") log = [] [log.extend(json.loads(a)["users"]) for a in f] profs = [] [profs.extend(fy.parser.parse(l["description"])) for l in log] bow = fy.formatter.format(profs) ret = pldautils.get_topic_from_server_p(bow) mizu = {"topic": ret} f = open("eventtweetmodels_mixed2.json") eventtweetmodels = [json.loads(l) for l in f] recommender = EventTweetRecommender.EventTweetRecommender(eventtweetmodels) #tweets = recommender.calc(json.load("mizumodel.json"), 30) tweets = recommender.calc(mizu, 30) tweets2 = recommender.calc(mizu, 30, "pro") tokens = [t["event_info"]["tokens"] for t in tweets] for t in tokens: print ' '.join(tt["word"] for tt in t) f.close() f = open("eventtweetmodels_mixed.json", "r") i = 0 for l in f: print json.loads(l)["text"]
# words = json.loads(open("backup", "r").read()) # fy = UserModelFactory.UserModelFactory(wordfile) # bow = fy.formatter.format(words) fty = EventTweetFactory(wordfile) # recent = json.loads(open("recenteventtweets.json").read()) fy = UserModelFactory.UserModelFactory(wordfile) f = open("log_murasaki8823.json", "r") log = [] [log.extend(json.loads(a)["users"]) for a in f] profs = [] [profs.extend(fy.parser.parse(l["description"])) for l in log] bow = fy.formatter.format(profs) ret = pldautils.get_topic_from_server_p(bow) mizu = {"topic": ret} f = open("eventtweetmodels_mixed2.json") eventtweetmodels = [json.loads(l) for l in f] recommender = EventTweetRecommender.EventTweetRecommender(eventtweetmodels) # tweets = recommender.calc(json.load("mizumodel.json"), 30) tweets = recommender.calc(mizu, 30) tweets2 = recommender.calc(mizu, 30, "pro") tokens = [t["event_info"]["tokens"] for t in tweets] for t in tokens: print " ".join(tt["word"] for tt in t) f.close() f = open("eventtweetmodels_mixed.json", "r") i = 0 for l in f: print json.loads(l)["text"]