def getDataDrugPoor(path, tt): data = spark.read.format('parquet').load(path) # 处理甲类药,获得甲类药每年的开销以及每年使用的数量 data01 = data.select('PersonalType', 'DrugName', 'DT', 'Count', 'FeeSum', 'AllowedComp', 'CompRatio', 'CompRatio_Type') # & (data01.CompRatio_Type == '{}'.format(tt)) data01 = data01.where((data01.PersonalType == '17') & (data01.DrugName != '0')) \ .withColumn("Count", data01.Count.cast(IntegerType())) \ .withColumn("FeeSum", data01.FeeSum.cast(IntegerType())) \ .withColumn("DrugName", CD.changeNameUDF(data01.DrugName)) # data01.show() data01 = data01.drop('PersonalType', 'CompRatio_Type', 'CompRatio', 'AllowedComp') data01_Fee = data01.drop("Count") \ .groupby("DrugName") \ .pivot("DT", ['2017', '2018', '2019']) \ .agg(F.sum('FeeSum')) \ .fillna(0) data01_Fee = data01_Fee.orderBy(data01_Fee['2019'].desc()) data_fee = [] for i in data01_Fee.head(20): dd = {'drugName': i['DrugName']} tt = [i['2017'], i['2018'], i['2019']] dd['drugFee'] = tt data_fee.append(dd)
def getGrowthRate(path): data = spark.read.format('parquet').load(path) \ .select("PersonalType", "DT", "DiseaseName") data = data.withColumn("DiseaseName", CD.changeNameUDF(data.DiseaseName)) \ .where(data.DiseaseName != '0') \ .withColumn("DT", data.DT.cast(IntegerType())) data_poor = data.where(data.PersonalType == "17").drop("PersonalType") data_rich = data.where(data.PersonalType != "17").drop("PersonalType") calGrowthRate(data_poor, 0) calGrowthRate(data_rich, 1)
def startCategorizer(): result_list = ['emergent reader', 'other'] book_filename = raw_input('manuscript file name: ') freq, avg_dis = CreateData.createData(book_filename) X = zeros((1, 2)) X[0] = [freq, avg_dis] #classify unknown data with our model clf = joblib.load('model_svm.pkl') result = clf.predict(X[0]) print 'the book category is: ', result_list[(int(result[0])) - 1]
def ShowCoords(img_file, coord_file): IMG_DCT = "photos_used/" img = Image.open(IMG_DCT + img_file) draw = ImageDraw.Draw(img) COORDS_DCT = "coords/" coord_list = CreateData.LoadCoords(coord_file, COORDS_DCT) for c in coord_list: if c[2] == 1: x = c[0] y = c[1] draw.line([(x - 4, y - 4), (x + 4, y + 4)], fill="green", width=2) draw.line([(x - 4, y + 4), (x + 4, y - 4)], fill="green", width=2) del draw Image._show(img)
def getDiseaseNums(df, year, sex, personalType): """ 数量, 年龄阶段, 数量*10, 病名, 年份 @param sex: @param df: 待处理的dataframe @param year: 年份 @return: """ temp = df.withColumn("DiseaseName", CD.changeNameUDF(df.DiseaseName)) \ .where(df.DiseaseName != '0') data_man = temp.select("DiseaseName", "Age") \ .where(temp.Sex == "{}".format(sex)) data_man_collect = data_man.rdd.collect() dict = [ {"age": "0-9", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "10-19", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "20-29", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "30-39", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "40-49", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "50-59", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "60-69", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "70-79", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "80-89", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "90以上", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, ] tt = 0 for result in data_man_collect: tt += 1 age = int(result['Age'] / 10) if age > 9: age = 9 DiseaseName = str(result['DiseaseName']) if DiseaseName in dict[age]['DiseaseList']: dict[age]['DiseaseNumList'][DiseaseName] += 1 else: dict[age]['DiseaseList'].append(DiseaseName) dict[age]['DiseaseNumList'][DiseaseName] = 1 for i in dict: i['DiseaseNumList'] = sorted(i['DiseaseNumList'].items(), key=lambda d: d[1], reverse=True) # mongodb(year, i['age'], personalType, sex) for t in dict: print(t) saveMongodb(t)
def calDiseaseNums(df, year, sex, personalType): # TotalFee RealComp SelfPay temp = df.withColumn("SelfPay", df.SelfPay.cast(IntegerType())) \ .withColumn("Age", df.Age.cast(IntegerType())) \ .withColumn("DiseaseName", CD.changeNameUDF(df.DiseaseName)) \ .where(df.DiseaseName != '0') \ .fillna(0) data_man = temp.select("DiseaseName", "Age", "SelfPay") \ .where(temp.Sex == "{}".format(sex)) \ .drop("Sex") data_man_collect = data_man.rdd.collect() dict = [ {"age": "0-9", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "10-19", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "20-29", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "30-39", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "40-49", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "50-59", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "60-69", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "70-79", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "80-89", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, {"age": "90以上", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType}, ] tt = 0 for result in data_man_collect: print(result) tt += 1 age = int(result['Age'] / 10) if age > 9: age = 9 DiseaseName = str(result['DiseaseName']) if DiseaseName in dict[age]['DiseaseList']: dict[age]['DiseaseFeeList'][DiseaseName] += int(result['SelfPay']) else: dict[age]['DiseaseList'].append(DiseaseName) dict[age]['DiseaseFeeList'][DiseaseName] = int(result['SelfPay']) for i in dict: i['DiseaseFeeList'] = sorted(i['DiseaseFeeList'].items(), key=lambda d: d[1], reverse=True) for i in dict: print(i) saveMongodb(i)
import csv import CreateData from Bechdel import Bechdel data = CreateData.CreateData() movie_list = data.create_movie() params = [] with open("bechdel_test_data", 'a') as csvFile: writer = csv.writer(csvFile) for movie in movie_list: tester = Bechdel(movie) tester.run_bechdel_test() params.append(movie.movie_name) params.append(tester.test1) params.append(tester.test2) params.append(tester.test3) params.append(tester.overall) writer.writerow(params) csvFile.close()
def openImage(coord_dct): event2canvas = lambda e, c: (c.canvasx(e.x), c.canvasy(e.y)) if (True): root = Tk() # setting up a tkinter canvas with scrollbars frame = Frame(root, bd=2, relief=SUNKEN) frame.grid_rowconfigure(0, weight=1) frame.grid_columnconfigure(0, weight=1) xscroll = Scrollbar(frame, orient=HORIZONTAL) xscroll.grid(row=1, column=0, sticky=E + W) yscroll = Scrollbar(frame) yscroll.grid(row=0, column=1, sticky=N + S) canvas = Canvas(frame, bd=0, xscrollcommand=xscroll.set, yscrollcommand=yscroll.set) canvas.grid(row=0, column=0, sticky=N + S + E + W) xscroll.config(command=canvas.xview) yscroll.config(command=canvas.yview) frame.pack(fill=BOTH, expand=1) # adding the image File = askopenfilename(parent=root, initialdir="M:/", title='Choose an image.') print("opening %s" % File) img = PhotoImage(file=File) img2 = Image.open(File) arr = np.array(img2) # arr = arr.astype(int) coords = [] print(arr.shape) h, w = arr.shape[:2] canvas.create_image(0, 0, image=img, anchor="nw") canvas.config(scrollregion=canvas.bbox(ALL)) mark_size = 3 mark_width = 2 basename = os.path.basename(File) coord_file = basename.replace("PICT", "coords").replace("png", "csv") try: coord_list = CreateData.LoadCoords(coord_file, coord_dct) for c in coord_list: x = c[0] y = c[1] if c[2] == 1: canvas.create_line(x - mark_size, y - mark_size, x + mark_size, y + mark_size, fill="blue", width=mark_width) canvas.create_line(x - mark_size, y + mark_size, x + mark_size, y - mark_size, fill="blue", width=mark_width) except: pass # function to be called when mouse is clicked def printcoordsPos(event): # outputting x and y coords to console global colonies colonies += 1 cx, cy = event2canvas(event, canvas) color = "#32b33b" canvas.create_line(cx - mark_size, cy - mark_size, cx + mark_size, cy + mark_size, fill=color, width=mark_width) canvas.create_line(cx - mark_size, cy + mark_size, cx + mark_size, cy - mark_size, fill=color, width=mark_width) coords.append((cx, cy, 1)) def printcoordsNeg(event): cx, cy = event2canvas(event, canvas) canvas.create_line(cx - mark_size, cy - mark_size, cx + mark_size, cy + mark_size, fill="#ed9121", width=mark_width) canvas.create_line(cx - mark_size, cy + mark_size, cx + mark_size, cy - mark_size, fill="#ed9121", width=mark_width) coords.append((cx, cy, 0)) def Undo(Event=None): xy = coords.pop() x = xy[0] y = xy[1] canvas.create_oval(x - 2, y - 2, x + 2, y + 2, outline="#9400D3", fill="#9400D3") global colonies colonies -= 1 # mouseclick event canvas.bind("<ButtonPress-1>", printcoordsPos) canvas.bind("<ButtonPress-3>", Undo) canvas.bind_all('<ButtonPress-2>', printcoordsNeg) root.mainloop() print(colonies) return coords, coord_file
def drug_nums_to_mysql(path, year, choice, num): """ 将历年药品每月用量及总和导入Mysql @param path: 文件路径 @param year: 年份 @param choice: 药品类型 @param num: 展示数量 @return: null """ data = spark.read.format('parquet').load(path) data = data.select('PersonalType', 'RegisterDate', 'DT', 'DrugName', 'Count', 'CompRatio_Type') \ .where("CompRatio_Type = '" + choice + "'") \ .where("DT = {}".format(year)) \ .drop('DT', 'CompRatio_Type') # 处理RegisterDate,提取月份 data = data.withColumn('RegisterDate', data.RegisterDate.substr(6, 2)) \ .withColumnRenamed('RegisterDate', 'Month') data = data.withColumn('Month', data.Month.cast(IntegerType())) \ .withColumn('Count', data.Count.cast(IntegerType())) \ .withColumn('DrugName', CD.changeNameUDF(data.DrugName)) \ .where('DrugName != "0"') data_poor = data.where('PersonalType = 17').drop('PersonalType') data_not_poor = data.where('PersonalType != 17').drop('PersonalType') df_not_poor = get_data_temp(data_not_poor) df_poor = get_data_temp(data_poor) df_poor = df_poor.orderBy(df_poor['Sum'].desc()) all_drug_nums_poor = [['药名', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']] all_drug_nums_not_poor = [['药名', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']] conn = get_conn() cur = conn.cursor() t = 0 df_poor = df_poor.collect() for i in df_poor: print(str(t)) if t == int(num): break t += 1 temp_poor = [] temp_not_poor = [] try: j = df_not_poor.select('*').where(df_not_poor.DrugName == str(i['DrugName'])).collect()[0] print(i) temp_poor.append(i['DrugName']) temp_not_poor.append(i['DrugName']) # 0 --> 建档立卡 # 1 --> 非建档立卡 cur.execute( "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (i["DrugName"], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9], i[10], i[11], i[12], year, 0, choice, i['Sum'])) cur.execute( "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (i["DrugName"], j[1], j[2], j[3], j[4], j[5], j[6], j[7], j[8], j[9], j[10], j[11], j[12], year, 1, choice, j['Sum'])) except Exception as e: print(e) temp_poor.append(i['DrugName']) temp_not_poor.append(i['DrugName']) cur.execute( "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (i["DrugName"], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9], i[10], i[11], i[12], year, 0, choice, i['Sum'])) cur.execute( "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (i["DrugName"], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, year, 1, choice, 0)) conn.commit() # all_nums = { # 'poor': all_drug_nums_poor, # 'not_poor': all_drug_nums_not_poor # } # cur.close() # conn.close() '''