-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
99 lines (86 loc) · 2.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf8 -*-
import numpy as np, sys, os, cPickle as pickle, random, codecs
from datetime import datetime
def read_message(fn):
raw_messages = []
with open(fn, 'r') as f:
message = []
for i, line in enumerate(f.readlines()):
if i > 7:
if line == '\n':
if len(message) > 0:
raw_messages.append(message)
message = []
else:
message.append(line.strip())
girl = ['我女人', '525143374','cc','lulu','你说呢?!','丫头']
boy = ['歪厄姆','u.f.o']
messages = []
for message in raw_messages:
terms = message[0].split()
if len(terms) < 3:
continue
author = None
if terms[-1] in girl:
author = 'g'
elif terms[-1] in boy:
author = 'b'
else:
continue
time = datetime.strptime('-'.join(terms[:2]), '%Y-%m-%d-%H:%M:%S')
body = ' '.join(message[1:])
messages.append((time, author, body))
return messages
def stat_day(messages):
import collections
from pylab import plot_date, show
date2num = collections.defaultdict(int)
for time, author, body in messages:
date2num[time.strftime('%y-%m-%d')] += 1
dates, nums = [], []
for k in sorted(date2num.iterkeys()):
y, m, d = k.split('-')
dates.append(datetime(int(y), int(m), int(d)))
nums.append(date2num[k])
plot_date(dates, nums, 'go')
show()
def load_message(fn_raw, fn_msg):
messages = None
if os.path.exists(fn_msg):
with open(fn_msg, 'r') as f:
messages = pickle.load(f)
else:
messages = read_message(fn_raw)
with open(fn_msg, 'w') as f:
pickle.dump(messages, f)
return messages
def make_word(messages, word, max_length):
random.shuffle(messages)
for time, author, body in messages:
if len(body) > max_length:
continue
try:
idx = body.decode('utf-8').index(word)
return time, author, body, idx
except ValueError:
continue
return None, None, None, None
def write_sentences(fn, max_length = 50):
messages = load_message('var/raw.txt', 'var/msg.db')
if messages is not None:
# stat_day(messages)
sentence = '泪如雨下在你的发,冲化了最美的年华。'
sentences = []
for word in sentence.decode('utf-8'):
result = make_word(messages, word, max_length)
time, author, body, idx = result
if time is not None:
terms = body.decode('utf-8').split(word)
sentence_for_word = ''.join([terms[0],'<b>',word,'</b>',terms[1]])
sentences.append(sentence_for_word)
else:
sentences.append(word)
content = '\n'.join(['<html>', '<br/>\n'.join(sentences), '</html>'])
with codecs.open(fn, 'w', 'gbk') as f:
f.write(content)
write_sentences('out.html')