class IpdownCommand(BaseCommand): limit = 200 # 用于手机号分页 dir_path = os.path.dirname(os.path.abspath(__file__)) # 获取commands绝对目录 delimiter = os.sep # 目录分隔符 phone_dir = 'phone_ip' fileUtil = '' oUserExtend = '' oUser = '' def __init__(self): self.fileUtil = fileUtil() # 用户扩展表 self.oUserExtend = YiUserExtend() # 1. 连一亿元库用户表 self.oUser = YiUser() def runData(self, date_time): # 开始时间 start = time.time() # 时间处理 start_time = datetime.strptime(date_time + " 00:00:00", '%Y-%m-%d %H:%M:%S') end_time = datetime.strptime(date_time + " 23:59:59", '%Y-%m-%d %H:%M:%S') print("开始时间:%s - 结束时间:%s" % (start_time, end_time)) # 2. 查找有多少条用户 total = self.oUser.getMobileCount(start_time, end_time) if not total: print("\"%s\" - \"%s\" 间时区间暂无数据!" % (start_time, end_time)) return None limit = self.limit pages = math.ceil(total / limit) cur_page = 0 # 开始时间 start = time.time() save_path = "%s%s%s%s" % (self.dir_path, self.delimiter, self.phone_dir, self.delimiter) # 创建目录 self.fileUtil.mkdir(save_path) # 保存文件名 filename = "%s%s%s" % (save_path, date_time, '.txt') # 通讯详单目录 ssdb_path = "%s%s%s" % (save_path, date_time, self.delimiter) # 创建目录 self.fileUtil.mkdir(ssdb_path) num = 0 # 判断文件是否存在,存在就删除 if os.path.exists(filename): os.remove(filename) num = 0 # 8.循环手机号 while cur_page < pages: offset = cur_page * limit # 自增 cur_page += 1 mobile_data = self.oUser.getMobileUser(start_time, end_time, offset, limit) user_phone = '' for c_phone in mobile_data: num += 1 phone = c_phone.mobile.strip() user_phone += str(phone) + "\n" user_id = str(c_phone.user_id).strip() # 查找用户扩展信息是否存在 ip_str = self.ipHandler(user_id) if not ip_str: continue #保存ip文件 # 判断目录是否存在 ip_filename = "%s%s%s" % (ssdb_path, phone, '.txt') if os.path.exists(ip_filename): os.remove(ip_filename) self.fileUtil.writePhoneText(ip_filename, json.dumps(ip_str)) num += len(ip_str) #保存手机号 self.fileUtil.writeText(filename, user_phone) save_num = "%s 条" % str(num) print(save_num) # 结束时间 end = time.time() # 耗时 time_consuming = "耗时:" + str(end - start) print(time_consuming) print("done!") ''' ip处理 ''' def ipHandler(self, user_id): if not user_id: return False # 要找用户ip ip_user_info = self.oUserExtend.getUserIp(user_id) if not ip_user_info: return False ip_str = ip_user_info.reg_ip.strip() if not ip_str: return False # 通过ip查找手机号 # print(ip_str) ip_cpunt = self.oUserExtend.getIpCount(ip_str) limit = self.limit pages = math.ceil(ip_cpunt / limit) cur_page = 0 user_phone = [] while cur_page < pages: offset = cur_page * limit # 自增 cur_page += 1 # 获取同一个ip的所有信息 get_ip_data = self.oUserExtend.getIpData(ip_str, offset, limit) if not get_ip_data: continue #获取用户user_id ip_tuple = [] for ip_info in get_ip_data: ip_tuple.append(str(ip_info.user_id)) # 去掉重复的user_id if user_id in ip_tuple: ip_tuple.remove(user_id) # 判断是否为空,为空跳过 if not ip_tuple: continue ip_tuple = tuple(ip_tuple) #判断是否需要in if len(ip_tuple) > 1: user_data = self.oUser.getMobileByUserId(ip_tuple) user_phone.extend(user_data) if (len(ip_tuple)) == 1: user_id = ip_tuple[0] user_info = self.oUser.getByUserId(user_id) try: if user_info.mobile: user_phone.append(user_info.mobile) except AttributeError as f: print(f) continue return user_phone
def runData(self, start_time, end_time): # 时间处理 start_time = datetime.strptime(start_time + " 00:00:00", '%Y-%m-%d %H:%M:%S') end_time = datetime.strptime(end_time + " 23:59:59", '%Y-%m-%d %H:%M:%S') print("开始时间:%s - 结束时间:%s" % (start_time, end_time)) # 1. 连一亿元库用户表 oUser = YiUser() # 2. 查找有多少条用户 total = oUser.getMobileCount(start_time, end_time) if not total: print("\"%s\" - \"%s\" 时间区间暂无数据!" % (start_time, end_time)) return None limit = self.limit pages = math.ceil(total / limit); cur_page = 0 # 3. 连接图数据库 client = self.Oorientdb.connectDb() if not client: return False # 4. 打开数据库 open_database = self.Oorientdb.openDatabas(client) if not open_database: return False # 5.查看类是否存在 self.Oorientdb.createClass(client, self.class_name) # 6.创建属性 self.Oorientdb.createProperty(client, self.class_name, self.property) # 7.创建索引 self.Oorientdb.createIndex(client, self.class_name, self.property) # 创建边缘 self.Oorientdb.createEdge(client, self.edge_name) num = 0 insert_num = 0 # 开始时间 start = time.time() # 8.循环手机号 while cur_page < pages: offset = cur_page * limit # 自增 cur_page += 1 mobile_data = oUser.getMobileUser(start_time, end_time, offset, limit) for user_info in mobile_data: phone = user_info.mobile.strip() # 9.从ssdb中获取数数据 phone_detail = self.ssdb_resources.get(phone) # phone_detail = '{"phoneArr":["15125954590","073188040149","15125887806","18719499607","18287885870","13764022964"],"create_time":"2018-03-15 09:52;01","modify_time":"2018-06-24 12:30:06"}' message_list = [] if phone_detail is not None: message_list = json.loads(phone_detail) message_list = list(set(message_list['phoneArr'])) # 去掉当前手机号 if phone in message_list: message_list.remove(phone) num += len(message_list) # ssdb分页 ssdb_limit = self.ssdb_limit; ssdb_cur_page = 0 while ssdb_cur_page < len(message_list): ssdb_offset = ssdb_cur_page + ssdb_limit print("%s---%d---%d" % (phone, ssdb_cur_page, ssdb_offset)) page_message_list = message_list[ssdb_cur_page:ssdb_offset] ssdb_cur_page += ssdb_limit # 将当前手机号加入到查询列表中 page_message_list.append(phone) # 批量查找 get_mobile_data = self.Oorientdb.getMobileData(client, self.class_name, page_message_list) source_rid = '' target_list = "" for m in get_mobile_data: if m.phone == phone: source_rid = m._OrientRecord__rid continue target_list += m._OrientRecord__rid + "," target_list = target_list.strip("\,") # 如果来源为空或目标为空就跳过 if not source_rid or not target_list: continue #查找边缘是否存在 get_edge_data = self.Oorientdb.getEdgeData(client, self.edge_name, source_rid) edge_list = "" for edge_data in get_edge_data: #去掉当前手机号 if source_rid == edge_data._OrientRecord__rid: continue edge_list += edge_data._OrientRecord__rid + "," edge_list = edge_list.strip("\,") # 去掉重复的rid target_rid = list(set(target_list).difference(set(edge_list))) if not target_rid: continue # 创建成功累加 insert_num += len(target_rid) #批查插入 insert_edge = self.Oorientdb.createEdgeMore(client, self.edge_name,source_rid, target_rid) # 结束时间 end = time.time() # 耗时 time_consuming = "耗时:" + str(end - start) print(time_consuming) print("扫描:%s 条" % str(num)) print("创建:%s 条" % str(insert_num)) print("done!")
def runData(self, start_time = None , end_time = None): # self.example(); # return None #======================================= # 开始时间 start = time.time() #1. 判断开始和结束时间,如果不存在就返回当前时间 last_day = datetime.now() + timedelta(days=-1) if start_time is None: start_time = last_day.strftime('%Y-%m-%d 00:00:00') if end_time is None: end_time = last_day.strftime('%Y-%m-%d 23:59:59') # ======================================= #2. 连接ssdb ssdb = SsdbObject(False) ssdb_resources = ssdb.ssdbConnection() # print(ssdb_resources) # ======================================= #3. 连一亿元库用户表 oUser = YiUser() # 5.连接orientdb数据库 client = self.connectDb() # 6.打开数据库 open_databases = self.openData(client) # 7.创建类和边缘 class_name = self.class_name # 类名 dege_name = self.dege_name # 边缘名 class_info = self.createClass(client, class_name) # 创建类 dege_info = self.createClass(client, dege_name, False) # 创建边缘 index_name = self.createIndex(client, class_name, "phone") # 创建索引 # ======================================= # 5.查找有多少条用户 total = oUser.getMobileCount(start_time, end_time) # 6.分页处理 limit = self.limit pages = math.ceil(total / limit); cur_page = 0; num = 0 while cur_page < pages: offset = cur_page * limit mobile_data = oUser.getMobileUser(start_time, end_time, offset, limit) for data in mobile_data: phone = data.mobile.strip() # 查找关系数据 get_data = self.getData(client, class_name, phone) # 创建顶点 phone_info = self.createVertex(client, class_name, phone) source = phone_info[0].rid #======================================== # 9.ssdb找到用户通讯录 if not phone: continue message_str = ssdb_resources.get(phone) message_list = [] if message_str is not None: message_list = list(set(json.loads(message_str))) # 去掉当前手机号 if phone in message_list: message_list.remove(phone) #ssdb分页 ssdb_limit = self.ssdb_limit; ssdb_cur_page = 0 while ssdb_cur_page < len(message_list): ssdb_offset = ssdb_cur_page + ssdb_limit print("%s---%d---%d" % (phone, ssdb_cur_page, ssdb_offset)) page_message_list = message_list[ssdb_cur_page:ssdb_offset] #print(page_message_list) #return None ssdb_cur_page += ssdb_limit # print(page_message_list) # 插入数据 insert_bool = self.insertValues(client, class_name, page_message_list) # if not insert_bool: # pass # continue # 获取插入数据 get_mobile = self.getMobileData(client, class_name, page_message_list) target_list = "" for target in get_mobile: target_list += target._OrientRecord__rid + "," target_list = target_list.strip("\,") if not target_list: continue # 创建边缘 edge_bool = self.createEdgeMore(client, dege_name, source, target_list) num += 1 # # print(page_message_list) # # 插入数据 # insert_bool = self.insertValues(client, class_name, page_message_list) # if not insert_bool: # continue # target_list = "" # for target in insert_bool: # target_list += target._OrientRecord__rid + "," # target_list = target_list.strip("\,") # if not target_list: # continue # #创建边缘 # edge_bool = self.createEdgeMore(client, dege_name, source, target_list) # num += 1 # # 休息1秒中 # time.sleep(1) return None #自增 cur_page += 1 print(num) # 结束时间 end = time.time() # 耗时 time_consuming = "耗时" + str(end - start) print(time_consuming) print("done!")
def runData(self, date_time): # 开始时间 start = time.time() # 连一亿元库用户表 oUser = YiUser() # 时间 check_data = self.checkData(date_time) if check_data == False: print("时间格式错误!") return None # ======================================= # 2. 连接ssdb ssdb = SsdbObject(False) ssdb_resources = ssdb.ssdbConnection() # print(ssdb_resources) # ======================================= # 3. 连一亿元库用户表 oUser = YiUser() # 5.连接orientdb数据库 client = self.connectDb() # 6.打开数据库 open_databases = self.openData(client) # 7.创建类和边缘 class_name = self.class_name # 类名 dege_name = self.dege_name # 边缘名 class_info = self.createClass(client, class_name) # 创建类 dege_info = self.createClass(client, dege_name, False) # 创建边缘 index_name = self.createIndex(client, class_name, "phone") # 创建索引 # ======================================= # 5.查找有多少条用户 total = oUser.getMobileCount(self.start_time, self.end_time) # 6.分页处理 limit = self.limit pages = math.ceil(total / limit) cur_page = 0 num = 0 while cur_page < pages: offset = cur_page * limit mobile_data = oUser.getMobileUser(self.start_time, self.end_time, offset, limit) for data in mobile_data: phone = data.mobile.strip() # 查找关系数据 get_data = self.getData(client, class_name, phone) # 创建顶点 phone_info = self.createVertex(client, class_name, phone) source = phone_info[0].rid # ======================================== # sdb找到用户通讯录 if not phone: continue message_str = ssdb_resources.get(phone) message_list = [] if message_str is not None: message_list = list(set(json.loads(message_str))) # 去掉当前手机号 if phone in message_list: message_list.remove(phone) # ssdb分页 ssdb_limit = self.ssdb_limit ssdb_cur_page = 0 while ssdb_cur_page < len(message_list): ssdb_offset = ssdb_cur_page + ssdb_limit print("%s---%d---%d" % (phone, ssdb_cur_page, ssdb_offset)) page_message_list = message_list[ssdb_cur_page:ssdb_offset] ssdb_cur_page += ssdb_limit # 插入数据 insert_bool = self.insertValues(client, class_name, page_message_list) # 获取插入数据 get_mobile = self.getMobileData(client, class_name, page_message_list) if not get_mobile: continue #过滤掉重复的边缘 target_list = self.filterEdge(client, dege_name, source, get_mobile) if not target_list: continue # # 创建边缘 edge_bool = self.createEdgeMore(client, dege_name, source, target_list) num += len(edge_bool) # 自增 cur_page += 1 print("共创建 %d 条边缘" % num) # 结束时间 end = time.time() # 耗时 time_consuming = "耗时" + str(end - start) print(time_consuming) print("done!")
class MaillistCommand(BaseCommand): def __init__(self): config = OrientdbConfig().getConfig() self.host = config['http_host'] self.port = config['http_port'] self.username = config['user'] self.passwd = config['password'] self.db_name = 'graph_data' #self.db_name = 'graphdata1' self.limit = 500 #用于手机号分页 self.oUser = YiUser() """运行数据""" def runData(self, start_time=None, end_time=None): # 1. 判断开始和结束时间,如果不存在就返回当前时间 last_day = datetime.now() + timedelta(days=-1) if start_time is None: start_time = last_day.strftime('%Y-%m-%d 00:00:00') if end_time is None: end_time = last_day.strftime('%Y-%m-%d 23:59:59') # ======================================= #连接ssdb ssdb = SsdbObject(False) ssdb_resources = ssdb.ssdbConnection() #开始时间 start = time.time() # 连接一亿元用户表 #查找有多少条用户 total = self.oUser.getMobileCount(start_time, end_time) limit = self.limit pages = math.ceil(total / limit) cur_page = 0 num = 0 #目录 path_name = "./commands/mobile/" path_a = self.mkdir(path_name) #打开文件 mobile_filename = path_name + start_time + ".txt" fo = open(mobile_filename, "w+") while cur_page < pages: #分页到出数据 offset = cur_page * limit mobile_data = self.oUser.getMobileUser(start_time, end_time, offset, limit) mobile_str = '' for mobile in mobile_data: #单条词ssdb数据 phone = mobile.mobile.strip() print("user_mobile=%s" % phone) if not phone: continue ssdb_message_str = ssdb_resources.get(phone) if ssdb_message_str: ssdb_path_name = path_name + start_time + "/" path_s = self.mkdir(ssdb_path_name) ssdb_filename = ssdb_path_name + phone + ".json" fo_ssdb = open(ssdb_filename, "wb+") fo_ssdb.write(ssdb_message_str) mobile_str += phone + "\n" fo.write(mobile_str) num += 1 # 自增 cur_page += 1 print(num) end = time.time() #耗时 time_consuming = "耗时" + str(end - start) print(time_consuming) #fo.write(time_consuming) fo.close() print("done!") #end_time = time.time() """ 创建目录 """ def mkdir(self, path): # 去除首位空格 path = path.strip() # 去除尾部 \ 符号 path = path.rstrip("\\") # 判断路径是否存在 # 存在 True # 不存在 False isExists = os.path.exists(path) # 判断结果 if not isExists: # 如果不存在则创建目录 # 创建目录操作函数 os.makedirs(path) print(path + ' 创建成功') return True else: # 如果目录存在则不创建,并提示目录已存在 print(path + ' 目录已存在') return False
class ColonymoreCommand(BaseCommand): # 数据库类对象 oUser = '' def __init__(self): self.oUser = YiUser() # def runData(self, start_time, end_time): #时间处理 start_time = datetime.strptime(start_time + " 00:00:00", '%Y-%m-%d %H:%M:%S') end_time = datetime.strptime(end_time + " 23:59:59", '%Y-%m-%d %H:%M:%S') #数据库操作 self.userOperation(start_time, end_time) def userOperation(self, start_time, end_time): # 查找有多少条用户 total = self.oUser.getMobileCount(start_time, end_time) print(total) ''' 集群图数据库配置 ''' def orientdbConfig(self): orientdb_config = { "http_host": "47.96.99.175", "http_port": "2424", "user": "******", "password": "******", "db_name": "test", #数据库名 "class_name": "class_phone", #类名 "ip_edge_name": "ip_relation_ship", #ip边缘名 "specifications_edge_name": "specifications_relation_ship", # 详单边缘名 "maillist_edge_name": "maillist_relation_ship", # 通讯录边缘名 } return orientdb_config ''' 通讯录ssdb配置 ''' def maillistSsdbConfig(self): ip_data = { "master": "47.93.121.71:8888", "slave": { "47.93.121.71:8888": 4, "47.93.121.71:8888": 6 }, } return ip_data ''' 通话详单ssdb配置 ''' def specificationsConfig(self): ip_data = { "master": "47.93.121.71:8888", "slave": { "47.93.121.71:8888": 4, "47.93.121.71:8888": 6 }, } return ip_data ''' clickhouse配置 ''' def clickhouseConfig(self): click_config = { "http": "47.93.121.71", "port": "9001", "user": "******", "pwd": "6lYaUiFi", "database": "", } return click_config ''' clickhouse连接 ''' def clickhouseConnection(self): config = self.clickhouseConfig() # 连接数据库 client = Client(config["http"], config["port"], config["database"], config["user"], config["pwd"]) a = client.execute("show tables") print(a)
def runData(self, start_time, end_time): # 时间处理 start_time = datetime.strptime(start_time + " 00:00:00", '%Y-%m-%d %H:%M:%S') end_time = datetime.strptime(end_time + " 23:59:59", '%Y-%m-%d %H:%M:%S') print("开始时间:%s - 结束时间:%s" % (start_time, end_time)) # 1. 连一亿元库用户表 oUser = YiUser() # 2. 查找有多少条用户 total = oUser.getMobileCount(start_time, end_time) if not total: print("\"%s\" - \"%s\" 时间区间暂无数据!" % (start_time, end_time)) return None limit = self.limit pages = math.ceil(total / limit); cur_page = 0 # 3. 连接图数据库 client = self.Oorientdb.connectDb() if not client: return False # 4. 打开数据库 open_database = self.Oorientdb.openDatabas(client) if not open_database: return False # 5.查看类是否存在 self.Oorientdb.createClass(client, self.class_name) # 6.创建属性 self.Oorientdb.createProperty(client, self.class_name, self.property) # 7.创建索引 self.Oorientdb.createIndex(client, self.class_name, self.property) num = 0 insert_num = 0 # 开始时间 start = time.time() # 8.循环手机号 while cur_page < pages: offset = cur_page * limit # 自增 cur_page += 1 mobile_data = oUser.getMobileUser(start_time, end_time, offset, limit) for user_info in mobile_data: phone = user_info.mobile.strip() # 9.从ssdb中获取数数据 phone_detail = self.ssdb_resources.get(phone) # phone_detail = '{"phoneArr":["15125954590","073188040149","15125887806","18719499607","18287885870","13764022964"],"create_time":"2018-03-15 09:52;01","modify_time":"2018-06-24 12:30:06"}' message_list = [] if phone_detail is not None: message_list = json.loads(phone_detail) message_list = list(set(message_list['phoneArr'])) # 去掉当前手机号 if phone in message_list: message_list.remove(phone) # 加入当前手机号到通讯详单中 message_list.append(phone) num += len(message_list) # ssdb分页 ssdb_limit = self.ssdb_limit; ssdb_cur_page = 0 while ssdb_cur_page < len(message_list): ssdb_offset = ssdb_cur_page + ssdb_limit print("%s---%d---%d" % (phone, ssdb_cur_page, ssdb_offset)) page_message_list = message_list[ssdb_cur_page:ssdb_offset] ssdb_cur_page += ssdb_limit # 批量获取数据 get_batch_data = self.Oorientdb.getBatchData(client, self.class_name, page_message_list) # 去掉重复的数据 mobile_data = list(set(page_message_list).difference(set(get_batch_data))) #mobile_data = get_batch_data # 格式数据 if not mobile_data: continue phone_tuple = '' for mobile in mobile_data: phone_tuple += "('" + mobile + "')," phone_tuple = phone_tuple.strip("\,") # 批量插入 insert_batch_data = self.Oorientdb.insertBatchData(client, self.class_name, phone_tuple) if not insert_batch_data: continue insert_num += len(mobile_data) # 结束时间 end = time.time() # 耗时 time_consuming = "耗时:" + str(end - start) print(time_consuming) print("扫描:%s 条" % str(num)) print("插入:%s 条" % str(insert_num)) print("done!")
def runData(self, date_time): # 开始时间 start = time.time() # 时间处理 start_time = datetime.strptime(date_time + " 00:00:00", '%Y-%m-%d %H:%M:%S') end_time = datetime.strptime(date_time + " 23:59:59", '%Y-%m-%d %H:%M:%S') print("开始时间:%s - 结束时间:%s" % (start_time, end_time)) # 1. 连一亿元库用户表 oUser = YiUser() # 2. 查找有多少条用户 total = oUser.getMobileCount(start_time, end_time) if not total: print("\"%s\" - \"%s\" 间时区间暂无数据!" % (start_time, end_time)) return None limit = self.limit pages = math.ceil(total / limit) cur_page = 0 # 开始时间 start = time.time() save_path = "%s%s%s%s" % (self.dir_path, self.delimiter, self.phone_dir, self.delimiter) #创建目录 self.mkdir(save_path) #保存文件名 filename = "%s%s%s" % (save_path, date_time, '.txt') # 通讯详单目录 ssdb_path = "%s%s%s" % (save_path, date_time, self.delimiter) # 创建目录 self.mkdir(ssdb_path) num = 0 # 判断文件是否存在,存在就删除 if os.path.exists(filename): os.remove(filename) # 8.循环手机号 while cur_page < pages: offset = cur_page * limit # 自增 cur_page += 1 mobile_data = oUser.getMobileUser(start_time, end_time, offset, limit) user_phone = '' for c_phone in mobile_data: num += 1 #查找ssdb上的数据 phone = c_phone.mobile.strip() user_phone += str(phone) + "\n" # 判断目录是否存在 ssdb_filename = "%s%s%s" % (ssdb_path, phone, '.txt') phone_detail = self.ssdb_resources.get(phone) #phone_detail = '{"phoneArr":["15125954590","073188040149","15125887806","18719499607","18287885870","13764022964"],"create_time":"2018-03-15 09:52;01","modify_time":"2018-06-24 12:30:06"}' if not phone_detail: continue print(type(phone_detail)) if os.path.exists(ssdb_filename): os.remove(ssdb_filename) # 记录到文件中 ssdb_save = self.writeDetailText(ssdb_filename, phone_detail) if ssdb_save: print("保存 \"%s\" 详单成功" % phone) else: print("保存 \"%s\" 详单失败" % phone) if not user_phone: continue #记录到文件中 user_phone_save = self.writeText(filename, user_phone) if user_phone_save: print("%s用户手机号保存成功" % user_phone) else: print("%s用户手机号保存失败" % user_phone) save_num = "%s 条" % str(num) print(save_num) # 结束时间 end = time.time() # 耗时 time_consuming = "耗时:" + str(end - start) print(time_consuming) print("done!")
class ColonylistCommand(BaseCommand): # 图数据库连接句柄 orientdb_client = '' # 图数据库配置文件 orientdb_config = '' # 数据库类对象 oUser = '' # 连接ssdb ssdb_resources = "" # 获取commands绝对目录 dir_path = os.path.dirname(os.path.abspath(__file__)) # 目录分隔符 delimiter = os.sep phone_dir = 'mobile' key_suffix = '_orientdb_10' # 图数据库名 #db_name = 'graph_phone' db_name = 'test' # ssdb通讯录分页,用于限制一次可以插入多少条 ssdb_limit = 500 limit = 200 # 类名 class_name = "class_phone_test" # 边缘名 edge_name = "relation_ship_test" orientdb_config = { "http_host": "47.96.99.175", "http_port": "2424", "user": "******", "password": "******", "database": "test", } def __init__(self): #self.orientdb_config = OrientdbConfig().getConfig() self.oUser = YiUser() self.orientdb_client = self.connectDb() # 连接ssdb ssdb = SsdbObject(False) self.ssdb_resources = ssdb.ssdbConnection() def runData(self, start_time, end_time): # 查找有多少条用户 total = self.oUser.getMobileCount(start_time, end_time) if not total: print("没有用户信息") return False # 图数据库 orien_oper = self.orientdbOperation() if not orien_oper: print("图数据库操作失败") return False # 分页处理 limit = self.limit limit = 2 pages = math.ceil(total / limit) cur_page = 0 num = 0 # 开始时间 start = time.time() while cur_page < pages: offset = cur_page * limit # 自增 cur_page += 1 # 取出用户信息 mobile_data = self.oUser.getMobileUser(start_time, end_time, offset, limit) for user_info in mobile_data: phone = user_info.mobile.strip() mail_list_num = self.importOperation(phone) num += mail_list_num # 结束时间 end = time.time() # 耗时 time_consuming = str(num) + " 条数据耗时:" + str(end - start) print(time_consuming) def importOperation(self, phone): # 判断手机号是否存在在缓存中 key = phone + self.key_suffix phone_rid = self.ssdb_resources.get(key) if not phone_rid: try: p_rid = self.insertOne(phone) self.ssdb_resources.set(key, p_rid) phone_rid = self.ssdb_resources.get(key) except pyorient.exceptions.PyOrientORecordDuplicatedException as f: print(f) vertext_info = self.getVertext(phone) v_rid = "#" + vertext_info[0].rid._OrientRecordLink__link self.ssdb_resources.set(key, v_rid) phone_rid = self.ssdb_resources.get(key) #查找通讯录 message_str = self.ssdb_resources.get(phone) message_list = [] if message_str is not None: message_list = list(set(json.loads(message_str))) # 去掉当前手机号 if phone in message_list: message_list.remove(phone) num = len(message_list) + 1 #==================================================== # ssdb分页 ssdb_limit = self.ssdb_limit ssdb_cur_page = 0 while ssdb_cur_page < len(message_list): ssdb_offset = ssdb_cur_page + ssdb_limit print("%s---%d---%d" % (phone, ssdb_cur_page, ssdb_offset)) page_message_list = message_list[ssdb_cur_page:ssdb_offset] ssdb_cur_page += ssdb_limit # 插入数据 try: insert_bool = self.insertValues(page_message_list) except pyorient.exceptions.PyOrientORecordDuplicatedException as f: print(f) # 获取rid target_list = "" for target_phone in page_message_list: t_key = target_phone + self.key_suffix t_rid = self.ssdb_resources.get(t_key) if not t_rid: continue target_list += str(t_rid, encoding="utf-8") + "," target_list = target_list.strip("\,") #print(phone_rid) phone_rid = str(phone_rid, encoding="utf-8") # 创建边缘 try: edge_bool = self.createEdgeMore(phone_rid, target_list) except pyorient.exceptions.PyOrientCommandException as f: print(f) return num ''' 图数据库操作(类,边缘,数据库,索引) ''' def orientdbOperation(self): # 1.判断本地数据库是否存在 is_database = self.orientdb_client.db_exists( self.db_name, pyorient.STORAGE_TYPE_PLOCAL) if not is_database: print("\"%s\" 数据库不存在" % self.db_name) return False # 2.打开数据库 try: open_databases = self.orientdb_client.db_open( self.db_name, self.orientdb_config['user'], self.orientdb_config['password']) except pyorient.exceptions.PyOrientDatabaseException as e: print(e) return False # 3.创建类 try: create_class_sql = "CREATE CLASS %s extends %s" % (self.class_name, "V") create_class = self.orientdb_client.command(create_class_sql) except pyorient.exceptions.PyOrientSchemaException as p: print("创建类:" + "=" * 50) print(p) # 4.创建类的属性 try: class_property_sql = "CREATE PROPERTY %s.phone STRING" % self.class_name #print(class_property_sql) create_property = self.orientdb_client.command( class_property_sql) # 手机号 #print(create_property) except pyorient.exceptions.PyOrientCommandException as p: print("创建类属性:" + "=" * 50) print(p) # 5.创建边缘 try: edge_sql = "CREATE CLASS %s extends %s" % (self.edge_name, "E") #print(edge_sql) create_edge = self.orientdb_client.command(edge_sql) #print(create_edge) except pyorient.exceptions.PyOrientSchemaException as f: print("创建边缘:" + "=" * 50) print(f) # 6.创建索引 try: index_sql = "create index %s ON %s(%s) UNIQUE " % ( self.class_name + "_phone", self.class_name, "phone") create_edge = self.orientdb_client.command(index_sql) print(create_edge) except pyorient.exceptions.PyOrientIndexException as f: print("创建索引:" + "=" * 50) print(f) print("创建数据库相关结束" + "=" * 40) return True ''' 连接图数据库 ''' def connectDb(self): client = pyorient.OrientDB(self.orientdb_config['http_host'], int(self.orientdb_config['http_port'])) session_id = client.connect(self.orientdb_config['user'], self.orientdb_config['password']) return client ''' 单条插入 ''' def insertOne(self, phone): insert_str = "insert into %s (phone) values(%s)" % (self.class_name, phone) insert_info = self.orientdb_client.command(insert_str) return insert_info[0]._OrientRecord__rid ''' 插入数据 ''' def insertValues(self, message_list): ssdb_mobile = [] for mobile in message_list: m_key = mobile + self.key_suffix if self.ssdb_resources.get(m_key): continue ssdb_mobile.append(mobile) if len(ssdb_mobile) < 1: return False # 表数过滤 diff_mobile = self.getMobileAll(ssdb_mobile) # 判断是否存在 if len(diff_mobile) < 1: return False str = '' for mobile in diff_mobile: str += "('" + mobile + "')," str = str.strip("\,") # 批量插入 insert_str = "insert into %s (phone) values %s" % (self.class_name, str) insert_data = self.orientdb_client.command(insert_str) for insert_object in insert_data: p_rid = insert_object._OrientRecord__rid key = insert_object.phone + self.key_suffix if not self.ssdb_resources.get(key): self.ssdb_resources.set(key, p_rid) return True ''' 创建边缘批量 ''' def createEdgeMore(self, source, target): edget_sql = "CREATE EDGE %s from %s TO [%s] " % (self.edge_name, source, target) ret = self.orientdb_client.command(edget_sql) #print(ret) return ret ''' 查找顶点 ''' def getVertext(self, phone): return self.orientdb_client.command( 'select @rid from %s where phone="%s" ' % (self.class_name, phone)) ''' 批量获取数据 ''' def getMobileAll(self, v_values): # 批量获取数据 get_sql = "select from %s where phone in %s" % (self.class_name, v_values) get_data = self.orientdb_client.command(get_sql) mobile_data = [] for mobile in get_data: m_key = mobile.phone + self.key_suffix #print(m_key) self.ssdb_resources.set(m_key, mobile._OrientRecord__rid) mobile_data.append(mobile.phone) #=============================== #v_values中有而mobile_data中没有的 mobile_data = list(set(v_values).difference(set(mobile_data))) return mobile_data