def read_position_excel(config): position_path = curPath.mainPath() + config.get_filename("position_file") workbook = load_workbook(position_path) sheets = workbook.get_sheet_names() # #四个sheet对应有四个维度,标签 data = {} #存罪犯数据 table = [] #存属性 booksheet = workbook.get_sheet_by_name(sheets[0]) rows = booksheet.rows # 迭代所有的行 num_row = 0 for row in rows: if num_row == 0: line = [col.value for col in row] table = table + line else: line = [col.value for col in row] data[line[0]] = line num_row = num_row + 1 # 不满足条件的罪犯数据删掉,此处数据很好,完备 new_data = {} for key in data: if (len(data[key]) == len(table)): new_data[key] = data[key] return table, new_data
def process(type, code): con = db.DB() sql = "select * from {} where id={}".format(type, code) data = con.select(sql) res = {} if len(data) == 1: # 查询到一个 adata = data[0] # 导出table if type == "thieves": table = np.load(curPath.mainPath() + "/thieves_dq/thieves.npy", allow_pickle=True) elif type == "position": table = np.load(curPath.mainPath() + "/position_zw/position.npy", allow_pickle=True) elif type == "fraudsters": table = np.load(curPath.mainPath() + "/fraudsters_zp/fraudsters.npy", allow_pickle=True) elif type == "drug": table = np.load(curPath.mainPath() + "/drug/drug.npy", allow_pickle=True) elif type == "traffic": table = np.load(curPath.mainPath() + "/traffic/traffic.npy", allow_pickle=True) elif type == "rape": table = np.load(curPath.mainPath() + "/violence/rape.npy", allow_pickle=True) elif type == "rob": table = np.load(curPath.mainPath() + "/violence/rob.npy", allow_pickle=True) elif type == "damage": table = np.load(curPath.mainPath() + "/violence/damage.npy", allow_pickle=True) elif type == "intentkill": table = np.load(curPath.mainPath() + "/violence/intentkill.npy", allow_pickle=True) else: return {"err": "参数错误!"} assert len(table) == len(adata) - 2 table = np.hstack((np.array(["id", "data_type"]), table)) for i in range(len(table)): res[table[i]] = adata[i] return res elif len(data) == 0: res["status"] = "id is not found!" return res return res
def read_thieves_excel(config): thieves_path=curPath.mainPath()+config.get_filename("thieves_file") workbook = load_workbook(thieves_path) sheets = workbook.get_sheet_names() # #四个sheet对应有四个维度,标签 # print(sheets) sheet_size=len(sheets) data={} table=[] for i in range(sheet_size): #145列/289列/433列/577列 booksheet = workbook.get_sheet_by_name(sheets[i]) rows = booksheet.rows # 迭代所有的行 num_row = 0 for row in rows: if num_row == 0: line = [col.value for col in row] if i==0: table=table+line else: table=table+line[1:] num_row =num_row +1 else: line = [col.value for col in row] if line[0] not in data: data[line[0]]=line else: data[line[0]]=data[line[0]]+line[1:] num_row = num_row + 1 if i==3: # print(table) # print(data) # print(len(table)) # print(len(data["14637李芸"])) break # 不满足条件的罪犯数据删掉 new_data={} for key in data: if (len(data[key]) == len(table)): new_data[key]=data[key] return table,new_data
def read_fraudsters_excel(config): thieves_path=curPath.mainPath()+config.get_filename("fraudsters_file") workbook = load_workbook(thieves_path) sheets = workbook.get_sheet_names() # #四个sheet对应有四个维度,标签 # print(sheets) sheet_size=len(sheets) data={} table=[] for i in range(sheet_size): #先算好标签 booksheet = workbook.get_sheet_by_name(sheets[i]) rows = booksheet.rows # 迭代所有的行,得到所有的标签 num_row = 0 for row in rows: if num_row == 0: line = [col.value for col in row] if i==0: table=table+line else: table=table+line[5:] num_row =num_row +1 for i in range(sheet_size): #再计算里面的数据 booksheet = workbook.get_sheet_by_name(sheets[i]) rows = booksheet.rows num_row=0 for row in rows: if num_row != 0: line = [col.value for col in row] # print(line) if line[0] not in data: data[line[0]]=[] if i==0: data[line[0]]+=(line+[-1 for _ in range(len(table)-len(line))]) else: data[line[0]]+=(line[:5]+[-1 for _ in range(len(table)-len(line))]+line[5:]) else: data[line[0]]=data[line[0]][:17]+line[5:] num_row = num_row + 1 return table,data
data_json.append(adata_json) name_set.add(name) if table == "intentkill": qid_start = 8000 # 故意杀人 for j in range(1000): adata = data[j] id = adata[0] name = adata[2] age = adata[4] type = adata[7] note = name + "," + str(age) + "岁," + type + "," + kill_note() # print(desc) sql_updata = "update {} set note=\"{}\" where id={};".format( table, note, int(id)) # print(sql_updata) con.updata(sql_updata) adata_json = {} adata_json["qid"] = qid_start + int(id) adata_json["type"] = table adata_json["note"] = note data_json.append(adata_json) name_set.add(name) sava_to_json.save_json(data_json, curPath.mainPath() + "/bert/data/data.json") save_to_txt( name_set, curPath.mainPath() + "/bert/stopwords/姓名年龄岁.txt") #bert里面进行用的时候,去掉这些姓名等词语,所以提前保存停止词
# 5.建表 traffic_table_name = config.get_tablename("traffic_name") sql_createTb = "create table {} (id int primary key auto_increment,data_type int(1) ,`{}`char(20) not null default ''," + "`{}` char(20) not null default ''," * ( len(table) - 2) + "{} text(1000))CHARSET=utf8;" sql_createTb = sql_createTb.format(traffic_table_name, *table) print(table) print(sql_createTb) con = db.DB() con.chech_table_exit(traffic_table_name, sql_createTb) # 6.数据写入 data_arr = [] for key in data: adata = data[key] sql_insert = "insert into {} values(default,0," + "'{}'," * ( len(adata)) + "'{}');" try: sql_insert = sql_insert.format(traffic_table_name, *adata, p.join(flag_map[key])) except: sql_insert = sql_insert.format(traffic_table_name, *adata, "无") print(sql_insert) con.insert(sql_insert) # print(data) # {key(编号),value:数据} # 7.保存table用于提供web接口 np.save(curPath.mainPath() + "/traffic/traffic.npy", table) # 3.5+4步骤中的数据static_map进行保存 sava_to_json.save_json( static_map, curPath.mainPath() + "/temp_file/traffic_static_map")
"适用人群": "暂时没有", "单元": ["虚拟单元1(00017)", "虚拟单元2(00018)"]}, "虚拟疗法(20)": { "age": "暂时没有", "edu": "暂时没有", "适用人群": "暂时没有", "单元": ["虚拟单元1(00017)", "虚拟单元2(00018)"]}, "虚拟疗法(21)": { "age": "暂时没有", "edu": "暂时没有", "适用人群": "暂时没有", "单元": ["虚拟单元1(00017)", "虚拟单元2(00018)"]}, "虚拟疗法(22)": { "age": "暂时没有", "edu": "暂时没有", "适用人群": "暂时没有", "单元": ["虚拟单元1(00017)", "虚拟单元2(00018)"]}, "虚拟疗法(23)": { "age": "暂时没有", "edu": "暂时没有", "适用人群": "暂时没有", "单元": ["虚拟单元1(00017)", "虚拟单元2(00018)"]}, "虚拟疗法(24)": { "age": "暂时没有", "edu": "暂时没有", "适用人群": "暂时没有", "单元": ["虚拟单元1(00017)", "虚拟单元2(00018)"]}, } s.save_json(flag_method,curPath.mainPath()+"/temp_file/flag_method.json") s.save_json(method_plan,curPath.mainPath()+"/temp_file/method_plan.json")
flag_map = {} for key in data: dim = data[key].tolist()[-n:] #后面的维度项目总分 table2 = table.tolist()[-n - 1:-1] #后面的维度项目标签 flag_map[key] = [] for i in range(len(dim)): if dim[i] > arr[i]: flag_map[key].append(table2[i]) return flag_map if __name__ == '__main__': # 生成1k条虚拟数据 N = 1100 # 1.读取统计文件position_static_map.json static_map = sava_to_json.load_json(curPath.mainPath() + "/temp_file/thieves_static_map") # print(len(static_map)) # 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 data = {} for i in range(N): #编号 id = 20000 + i sex = random.choice(range(2)) #1表示男,0表示女 name = random_name(sex) start_id = str(id) + name data[start_id] = [] data[start_id].append(start_id) #性别 sex data[start_id].append(sex)
flag_map = {} for key in data: dim = data[key].tolist()[-n:] #后面的维度项目总分 table2 = table.tolist()[-n - 1:-1] #后面的维度项目标签 flag_map[key] = [] for i in range(len(dim)): if float(dim[i]) > float(arr[i]): flag_map[key].append(table2[i]) return flag_map if __name__ == '__main__': # 生成1k条虚拟数据 N = 1100 # 1.读取统计文件position_static_map.json static_map = sava_to_json.load_json(curPath.mainPath() + "/temp_file/position_static_map") print(len(static_map)) # 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 data = {} for i in range(N): my_id = static_map["自编号"][-1] + 1 + i name = random_name() prison_room = random.choice(static_map["监狱"]) prison_area = random.choice(static_map["监区"]) r_id = 4500000000 + i birth = randomtimes('1956-01-01-0-0', '1985-12-30-0-0') birth2 = birth.strftime("%Y-%m-%d %H:%M:%S") survey_data = randomtimes( '2019-01-01-0-0', '2020-12-30-0-0').strftime("%Y-%m-%d %H:%M:%S")
import copy import time import random import datetime import position_zw.position as p import position_zw.random_generate as pr import violence.read_excel as v if __name__ == '__main__': N = 1100 # N=1 tablename = ["rape", "rob", "damage", "intentkill"] for j in range(len(tablename)): data = {} static_map = sava_to_json.load_json( curPath.mainPath() + "/temp_file/{}_static_map".format(tablename[j])) for i in range(N): name = pr.random_name() data[name] = [] data[name].append(name) birth = pr.randomtimes('1956-01-01-0-0', '1985-12-30-0-0') age = 2021 - birth.year birth2 = birth.strftime("%Y-%m-%d") data[name].append(birth2) data[name].append(age) data[name].append(random.choice(static_map["受教育程度"])) data[name].append(random.choice(static_map["刑期"])) data[name].append(random.choice(static_map["组别"])) # 添加问卷量表数据 for key in static_map:
txt[m] = method_plan[m] else: txt["status"] = "flag is not found!" return web.json_response(txt) async def handle_greeting(self, request): type = request.match_info.get('type') code = request.match_info.get('code') assert type in table txt = process(type, code) # print(txt) return web.json_response(txt) handler = Handler() app = web.Application() table = [ "thieves", "position", "fraudsters", "drug", "traffic", "rape", "rob", "damage", "intentkill" ] flag_method = s.load_json(curPath.mainPath() + "/temp_file/flag_method.json") print(flag_method.keys()) method_plan = s.load_json(curPath.mainPath() + "/temp_file/method_plan.json") app.add_routes([ web.get('/prisoners', handler.handle_intro), web.get('/prisoners/flag={flag}', handler.handle_plan), web.get('/prisoners/type={type}&code={code}', handler.handle_greeting) ]) # print(config) app['config'] = config web.run_app(app)
# print("每个犯人的标签:",flag_map) #5.建表 thieves_table_name=config.get_tablename("thieves_name") sql_createTb="create table {} (id int primary key auto_increment,data_type int(1) ,`{}`char(10) not null default '',"+"`{}` char(8) not null default '',"*(len(table)-2) +"{} text(1000))CHARSET=utf8;" sql_createTb=sql_createTb.format(thieves_table_name,*table) # print(sql_createTb) con=db.DB() con.chech_table_exit(thieves_table_name,sql_createTb) #6.数据写入 data_arr=[] # print(len(table)) # print(flag_map) for key in data: adata=data[key] sql_insert = "insert into {} values(default,0," + "'{}'," * (len(adata)) + "'{}');" try: sql_insert=sql_insert.format(thieves_table_name,*adata,join(flag_map[key])) except: sql_insert=sql_insert.format(thieves_table_name,*adata,"无") print(sql_insert) con.insert(sql_insert) # print(table) # print(data) # print(len(table)) # 7.保存table用于提供web接口 np.save(curPath.mainPath()+"/thieves_dq/thieves.npy", table) # 3.5+4步骤中的数据static_map进行保存 sava_to_json.save_json(static_map,curPath.mainPath()+"/temp_file/thieves_static_map")
table=np.hstack((table, np.array(["标签"]))) flag_map,static_map=cul_flag(data,table,n,0.27,static_map) # print(flag_map) # print(data) # 5.建表 fraudsters_table_name=config.get_tablename("fraudsters_name") sql_createTb = "create table {} (id int primary key auto_increment,data_type int(1) ,`{}`char(10) not null default ''," + "`{}` char(10) not null default ''," * ( len(table) - 2) + "{} char(255) not null default '')CHARSET=utf8;" sql_createTb = sql_createTb.format(fraudsters_table_name, *table) # print(sql_createTb) con = db.DB() con.chech_table_exit(fraudsters_table_name, sql_createTb) # 6.数据写入 data_arr = [] # print(len(table)) # print(flag_map) for key in data: adata = data[key] sql_insert = "insert into {} values(default,0," + "'{}'," * (len(adata)) + "'{}');" try: sql_insert = sql_insert.format(fraudsters_table_name, *adata, join(flag_map[key])) except: sql_insert = sql_insert.format(fraudsters_table_name, *adata, "无") print(sql_insert) con.insert(sql_insert) #7.保存table用于提供web接口 np.save(curPath.mainPath()+"/fraudsters_zp/fraudsters.npy", table) sava_to_json.save_json(static_map,curPath.mainPath()+"/temp_file/fraudsters_static_map")
len(table) - 2) + "{} text(1000))CHARSET=utf8;" sql_createTb = sql_createTb.format(position_table_name, *table) # print(table) print(sql_createTb) con = db.DB() con.chech_table_exit(position_table_name, sql_createTb) #6.数据写入 data_arr = [] # print(len(table)) # print(flag_map) for key in data: adata = data[key] sql_insert = "insert into {} values(default,0," + "'{}'," * ( len(adata)) + "'{}');" try: sql_insert = sql_insert.format(position_table_name, *adata, join(flag_map[key])) except: sql_insert = sql_insert.format(position_table_name, *adata, "无") print(sql_insert) # con.insert(sql_insert) # print(data) #{key(编号),value:数据} # print(len(table)) #144条 # 7.保存table用于提供web接口 np.save(curPath.mainPath() + "/position_zw/position.npy", table) # 3.5+4步骤中的数据static_map进行保存 sava_to_json.save_json( static_map, curPath.mainPath() + "/temp_file/position_static_map")
sum = 0 for j in items[i]: sum += int(float(adata[j])) inner_arr.append(sum) data[int(adata[0])] = np.hstack( (data[int(adata[0])], np.array(inner_arr))) #处理inner_arr 进行评估是否含有这个标签 return data if __name__ == '__main__': # 生成1k条虚拟数据 N = 1100 # 1.读取统计文件drug_static_map.json static_map = sava_to_json.load_json(curPath.mainPath() + "/temp_file/drug_static_map") # 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 data = {} base_id = int(max(static_map["ID完整"])) + 1 for i in range(N): my_id = base_id + i data[my_id] = [my_id] data[my_id].append(random.choice(static_map["年龄"])) data[my_id].append(random.choice(static_map["文化程度"])) data[my_id].append(random.choice(static_map["罪名"])) for key in static_map: if len(static_map[key]) == 4: data[my_id].append(pr.random_normal(static_map, key))
import random import datetime import position_zw.position as p import position_zw.random_generate as pr """ 这部分根据统计信息trafic_static_map完成数据的随机生成,并写入数据库,虚拟数据的data_type=1 1.读取统计文件trafic_static_map.json 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 3.计算总分,按照打标签static_map中最后一项阈值,完成打标签 4.构造sql,写入数据库 """ if __name__ == '__main__': # 生成1k条虚拟数据 N = 1100 # 1.读取统计文件drug_static_map.json static_map = sava_to_json.load_json(curPath.mainPath() + "/temp_file/traffic_static_map") print(len(static_map)) # 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 data = {} base_id = 1 for i in range(N): name = pr.random_name() data[name] = [name] for key in static_map: if len(static_map[key]) == 4: data[name].append(pr.random_normal(static_map, key)) # 3.计算总分, 按照打标签static_map中最后一项阈值, 完成打标签 n = 0
import thieves_dq.random_generate as th_random import position_zw.position as p import position_zw.random_generate as pr """ 这部分根据统计信息thieves_static_map完成数据的随机生成,并写入数据库,虚拟数据的data_type=1 1.读取统计文件thieves_static_map.json, -1并未参与到统计中 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 注意的是两个属性只能生成一种属性,所以可以使用一个随机数决定创造力还是马式 3.计算总分,按照打标签static_map中最后一项阈值,完成打标签 4.构造sql,写入数据库 """ if __name__ == '__main__': # 生成1k条虚拟数据 N = 1100 # 1.读取统计文件position_static_map.json static_map = sava_to_json.load_json(curPath.mainPath() + "/temp_file/fraudsters_static_map") print(len(static_map)) # 2.根据文件生成基本数据,根据文件的均值和标准差按照正太分布生成随机的问卷分数 data = {} base_id = max(static_map["罪犯编号"]) + 1 for i in range(N): id = base_id + i data[id] = [] data[id].append(id) #"罪犯编号" data[id].append(random.choice(static_map["队别"])) data[id].append(pr.random_name()) data[id].append(random.choice(static_map["受教育"])) data[id].append(random.choice(static_map["年龄"])) flag = random.choice(range(2))
# print(table) print(sql_createTb) con = db.DB() con.chech_table_exit(position_table_name, sql_createTb) # 6.数据写入 data_arr = [] # print(len(table)) # print(flag_map) for key in n_data: adata = n_data[key] sql_insert = "insert into {} values(default,0," + "'{}'," * ( len(adata)) + "'{}');" try: sql_insert = sql_insert.format(position_table_name, *adata, p.join(flag_map[key])) except: sql_insert = sql_insert.format(position_table_name, *adata, "无") print(sql_insert) con.insert(sql_insert) # 7.保存table用于提供web接口: 如: data_rape.npy np.save(curPath.mainPath() + "/violence/{}.npy".format(tablename[i]), table) # 3.5+4步骤中的数据static_map进行保存 data_rape_static_map sava_to_json.save_json( static_map, curPath.mainPath() + "/temp_file/{}_static_map".format(tablename[i]))
# 5.建表 drug_table_name = config.get_tablename("drug_name") sql_createTb = "create table {} (id int primary key auto_increment,data_type int(1) ,`{}`char(20) not null default ''," + "`{}` char(40) not null default ''," * ( len(table) - 2) + "{} text(1000))CHARSET=utf8;" sql_createTb = sql_createTb.format(drug_table_name, *table) print(table) print(sql_createTb) con = db.DB() con.chech_table_exit(drug_table_name, sql_createTb) # 6.数据写入 data_arr = [] for key in data: adata = data[key] sql_insert = "insert into {} values(default,0," + "'{}'," * ( len(adata)) + "'{}');" try: sql_insert = sql_insert.format(drug_table_name, *adata, p.join(flag_map[key])) except: sql_insert = sql_insert.format(drug_table_name, *adata, "无") print(sql_insert) con.insert(sql_insert) # print(data) # {key(编号),value:数据} # 7.保存table用于提供web接口 np.save(curPath.mainPath() + "/drug/drug.npy", table) # 3.5+4步骤中的数据static_map进行保存 sava_to_json.save_json(static_map, curPath.mainPath() + "/temp_file/drug_static_map")