class Friendmaps_to_list: def __init__(self): self.mc = MysqlClient() def query_map_to_dict(self, table_name): sys.stdout.write(u'开始查询表%s。\n' % table_name) sqlimit = 50 # 每次查询条数 sqlstart = 0 # 每次查询开始条数,每次加sqlimit dict_all = {} table_rows = self.query_table_rows(table_name) while table_rows >= sqlstart: sql_select = "select v from %s limit %d,%d;" % (table_name, sqlstart, sqlimit) rs = self.mc.query(sql_select) for r in rs: print len(r[0]) dict_loc = eval(r[0]) for k, v in dict_loc.iteritems(): if k in dict_all: # 合并个人关系字典 for k1, v1 in v.iteritems(): if k1 in dict_all[k]: dict_all[k][k1] += v1 else: dict_all[k][k1] = 1 else: dict_all[k] = v sqlstart += sqlimit sys.stdout.write(u'返回结果字典。\n') sys.stdout.write(str(len(dict_all))) return dict_all def query_table_rows(self, table_name): sql_rows = "select count(*) from %s;" % (table_name) rs = self.mc.query(sql_rows) table_rows = rs[0][0] sys.stdout.write("Table %s has %d rows." % (table_name, table_rows)) return table_rows def insert_to_list(self, dict_all, table_name): # 全部查询到后每个人的关系字典才具备完整性,才可以开始插入。 # 问题是内存不足,dict_all 太大。 # 对每一行原始记录,遍历每个个人字典,每个人工号去目标表中查询,若查询到已有的此人行,合并字典(MySQL并做不到?) # TODO: 消费表内存错误 sys.stdout.write(u'开始插入到表%s。\n' % table_name) for k, v in dict_all.iteritems(): v = str(v).replace("'", "\\'") sql_insert = "insert into %s(user_id, dict_relation) values('%s', '%s'); " % (table_name, k, v) self.mc.query(sql_insert) sys.stdout.write(u'表%s插入完成。\n' % table_name)
class FriendMap: def __init__(self, tableName, idItem, fixedItem, orderBy, maxRange): self.tableName = tableName self.searchItem = (idItem, fixedItem, orderBy) self.maxRange = maxRange self.mc = MysqlClient() self.prepare_environment() def prepare_environment(self): print 'Calculating the total work...' self.totalNum = self.mc.query('select count(*) from %s'%self.tableName)[0][0] self.restructTableName = 'restructed_%s'%self.tableName self.pr = ProcessRecorder(processName = self.tableName, localDataSet = {'nameDict':{}, 'currentPlace':'', 'idListFindingFriends':[]}, total = self.totalNum, warningMessage = 'Calculating friend map of %s'%self.tableName) try: self.mc.restruct_table(self.tableName, [('node_des',''), ('ac_datetime','')], self.restructTableName) except: print 'Storage has been restructed to %s'%self.restructTableName self.mc.query('create table if not exists %s_friendmap (k text, v longtext)'%self.tableName[:3]) def calculate(self): try: dataSource = self.mc.data_source('select * from %s limit %s,100'%(self.restructTableName, self.pr.count)) def add_friend_point(personA, personB): if not self.pr.localDataSet['nameDict'].has_key(personA): self.pr.localDataSet['nameDict'][personA] = {} if not self.pr.localDataSet['nameDict'][personA].has_key(personB): self.pr.localDataSet['nameDict'][personA][personB] = 0 self.pr.localDataSet['nameDict'][personA][personB] += 1 while 1: data = dataSource() if data is None: break self.pr.add() if self.pr.localDataSet['currentPlace'] != data[2]: if self.pr.localDataSet['currentPlace'] != '' and self.pr.localDataSet['nameDict']: self.mc.insert_data('%s_friendmap'%self.tableName[:3], items = [self.pr.localDataSet['currentPlace'], MySQLdb.escape_string(json.dumps(self.pr.localDataSet['nameDict']))]) self.pr.localDataSet['currentPlace'] = data[2] self.pr.localDataSet['nameDict'] = {} self.pr.localDataSet['idListFindingFriends'] = [] for iff in self.pr.localDataSet['idListFindingFriends']: if iff[1] - data[1] <= datetime.timedelta(0, self.maxRange): add_friend_point(data[0], iff[0]) add_friend_point(iff[0], data[0]) else: del self.pr.localDataSet['idListFindingFriends'][0] self.pr.localDataSet['idListFindingFriends'].append(data) except: # store the current process print '\nProcess stopped when processing %s'%self.pr.localDataSet['currentPlace'] traceback.print_exc() self.pr.store_process() if data is None: self.mc.insert_data('%s_friendmap'%self.tableName[:3], items = [self.pr.localDataSet['currentPlace'], MySQLdb.escape_string(json.dumps(self.pr.localDataSet['nameDict']))]) print '\nProcessing Finished'