-
Notifications
You must be signed in to change notification settings - Fork 0
/
RedisToMongo.py
42 lines (39 loc) · 1.16 KB
/
RedisToMongo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import redis
import pymongo as pm
import thulac
import os
import Public
dbConfig = Public.GetPara(os.path.join('.', 'config', 'db.txt'))
conn = redis.StrictRedis(host=dbConfig['host'], port=dbConfig['port'], db=int(dbConfig['db']),
decode_responses=True)
mc = pm.MongoClient('mongodb://gongcq:gcq@localhost:27017/text')
db = mc['text']
tl = thulac.thulac(user_dict = os.path.join('.', 'dict', 'dict'), filt = False, seg_only=True)
config = conn.hgetall('CONFIG_ES')
config['_id'] = 'CONFIG_ES'
db['CONFIG'].save(config)
keys = conn.keys()
count = 0
for key in keys:
if len(key) <= 7 or key[0 : 7] != 'http://':
continue
try:
item = conn.hgetall(key)
except Exception as e:
ddd = 0
item['_id'] = key
if key[len(key) - 2] == ',' or key[len(key) - 3] == ',': # parse
parse = []
cut = tl.cut(item['content'])
for c in cut:
parse.append(c[0])
item['parse'] = parse
item['type'] = 'section'
col = db['section']
col.save(item)
else:
item['type'] = 'news'
col = db['news']
col.save(item)
count += 1
print(count)