예제 #1
0
def getDataDrugPoor(path, tt):
    data = spark.read.format('parquet').load(path)
    # 处理甲类药,获得甲类药每年的开销以及每年使用的数量
    data01 = data.select('PersonalType', 'DrugName', 'DT', 'Count', 'FeeSum',
                         'AllowedComp', 'CompRatio', 'CompRatio_Type')
    # & (data01.CompRatio_Type == '{}'.format(tt))

    data01 = data01.where((data01.PersonalType == '17') & (data01.DrugName != '0')) \
        .withColumn("Count", data01.Count.cast(IntegerType())) \
        .withColumn("FeeSum", data01.FeeSum.cast(IntegerType())) \
        .withColumn("DrugName", CD.changeNameUDF(data01.DrugName))
    # data01.show()
    data01 = data01.drop('PersonalType', 'CompRatio_Type', 'CompRatio',
                         'AllowedComp')

    data01_Fee = data01.drop("Count") \
        .groupby("DrugName") \
        .pivot("DT", ['2017', '2018', '2019']) \
        .agg(F.sum('FeeSum')) \
        .fillna(0)
    data01_Fee = data01_Fee.orderBy(data01_Fee['2019'].desc())
    data_fee = []
    for i in data01_Fee.head(20):
        dd = {'drugName': i['DrugName']}
        tt = [i['2017'], i['2018'], i['2019']]
        dd['drugFee'] = tt
        data_fee.append(dd)
예제 #2
0
def getGrowthRate(path):
    data = spark.read.format('parquet').load(path) \
        .select("PersonalType", "DT", "DiseaseName")
    data = data.withColumn("DiseaseName", CD.changeNameUDF(data.DiseaseName)) \
        .where(data.DiseaseName != '0') \
        .withColumn("DT", data.DT.cast(IntegerType()))
    data_poor = data.where(data.PersonalType == "17").drop("PersonalType")
    data_rich = data.where(data.PersonalType != "17").drop("PersonalType")

    calGrowthRate(data_poor, 0)
    calGrowthRate(data_rich, 1)
def startCategorizer():
    result_list = ['emergent reader', 'other']
    book_filename = raw_input('manuscript file name: ')
    freq, avg_dis = CreateData.createData(book_filename)
    X = zeros((1, 2))
    X[0] = [freq, avg_dis]

    #classify unknown data with our model
    clf = joblib.load('model_svm.pkl')
    result = clf.predict(X[0])

    print 'the book category is: ', result_list[(int(result[0])) - 1]
예제 #4
0
def ShowCoords(img_file, coord_file):
    IMG_DCT = "photos_used/"
    img = Image.open(IMG_DCT + img_file)
    draw = ImageDraw.Draw(img)
    COORDS_DCT = "coords/"
    coord_list = CreateData.LoadCoords(coord_file, COORDS_DCT)
    for c in coord_list:
        if c[2] == 1:
            x = c[0]
            y = c[1]
            draw.line([(x - 4, y - 4), (x + 4, y + 4)], fill="green", width=2)
            draw.line([(x - 4, y + 4), (x + 4, y - 4)], fill="green", width=2)
    del draw
    Image._show(img)
예제 #5
0
def getDiseaseNums(df, year, sex, personalType):
    """
    数量, 年龄阶段, 数量*10, 病名, 年份
    @param sex:
    @param df: 待处理的dataframe
    @param year: 年份
    @return:
    """
    temp = df.withColumn("DiseaseName", CD.changeNameUDF(df.DiseaseName)) \
        .where(df.DiseaseName != '0')
    data_man = temp.select("DiseaseName", "Age") \
        .where(temp.Sex == "{}".format(sex))
    data_man_collect = data_man.rdd.collect()
    dict = [
        {"age": "0-9", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "10-19", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "20-29", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "30-39", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "40-49", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "50-59", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "60-69", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "70-79", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "80-89", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "90以上", "DiseaseList": [], "DiseaseNumList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
    ]

    tt = 0
    for result in data_man_collect:
        tt += 1
        age = int(result['Age'] / 10)
        if age > 9:
            age = 9
        DiseaseName = str(result['DiseaseName'])
        if DiseaseName in dict[age]['DiseaseList']:
            dict[age]['DiseaseNumList'][DiseaseName] += 1
        else:
            dict[age]['DiseaseList'].append(DiseaseName)
            dict[age]['DiseaseNumList'][DiseaseName] = 1
    for i in dict:
        i['DiseaseNumList'] = sorted(i['DiseaseNumList'].items(), key=lambda d: d[1], reverse=True)

    # mongodb(year, i['age'], personalType, sex)
    for t in dict:
        print(t)
        saveMongodb(t)
예제 #6
0
def calDiseaseNums(df, year, sex, personalType):
    # TotalFee  RealComp SelfPay
    temp = df.withColumn("SelfPay", df.SelfPay.cast(IntegerType())) \
        .withColumn("Age", df.Age.cast(IntegerType())) \
        .withColumn("DiseaseName", CD.changeNameUDF(df.DiseaseName)) \
        .where(df.DiseaseName != '0') \
        .fillna(0)

    data_man = temp.select("DiseaseName", "Age", "SelfPay") \
        .where(temp.Sex == "{}".format(sex)) \
        .drop("Sex")
    data_man_collect = data_man.rdd.collect()
    dict = [
        {"age": "0-9", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "10-19", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "20-29", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "30-39", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "40-49", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "50-59", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "60-69", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "70-79", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "80-89", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
        {"age": "90以上", "DiseaseList": [], "DiseaseFeeList": {}, "DT": year, "Sex": sex, "PersonalType": personalType},
    ]

    tt = 0
    for result in data_man_collect:
        print(result)
        tt += 1
        age = int(result['Age'] / 10)
        if age > 9:
            age = 9
        DiseaseName = str(result['DiseaseName'])
        if DiseaseName in dict[age]['DiseaseList']:
            dict[age]['DiseaseFeeList'][DiseaseName] += int(result['SelfPay'])
        else:
            dict[age]['DiseaseList'].append(DiseaseName)
            dict[age]['DiseaseFeeList'][DiseaseName] = int(result['SelfPay'])
    for i in dict:
        i['DiseaseFeeList'] = sorted(i['DiseaseFeeList'].items(), key=lambda d: d[1], reverse=True)
    for i in dict:
        print(i)
        saveMongodb(i)
예제 #7
0
import csv

import CreateData
from Bechdel import Bechdel

data = CreateData.CreateData()
movie_list = data.create_movie()
params = []

with open("bechdel_test_data", 'a') as csvFile:
    writer = csv.writer(csvFile)
    for movie in movie_list:
        tester = Bechdel(movie)
        tester.run_bechdel_test()
        params.append(movie.movie_name)
        params.append(tester.test1)
        params.append(tester.test2)
        params.append(tester.test3)
        params.append(tester.overall)
        writer.writerow(params)

csvFile.close()


예제 #8
0
def openImage(coord_dct):
    event2canvas = lambda e, c: (c.canvasx(e.x), c.canvasy(e.y))
    if (True):
        root = Tk()

        # setting up a tkinter canvas with scrollbars
        frame = Frame(root, bd=2, relief=SUNKEN)
        frame.grid_rowconfigure(0, weight=1)
        frame.grid_columnconfigure(0, weight=1)
        xscroll = Scrollbar(frame, orient=HORIZONTAL)
        xscroll.grid(row=1, column=0, sticky=E + W)
        yscroll = Scrollbar(frame)
        yscroll.grid(row=0, column=1, sticky=N + S)
        canvas = Canvas(frame,
                        bd=0,
                        xscrollcommand=xscroll.set,
                        yscrollcommand=yscroll.set)
        canvas.grid(row=0, column=0, sticky=N + S + E + W)
        xscroll.config(command=canvas.xview)
        yscroll.config(command=canvas.yview)
        frame.pack(fill=BOTH, expand=1)

        # adding the image
        File = askopenfilename(parent=root,
                               initialdir="M:/",
                               title='Choose an image.')
        print("opening %s" % File)
        img = PhotoImage(file=File)
        img2 = Image.open(File)
        arr = np.array(img2)
        # arr = arr.astype(int)

        coords = []

        print(arr.shape)
        h, w = arr.shape[:2]

        canvas.create_image(0, 0, image=img, anchor="nw")
        canvas.config(scrollregion=canvas.bbox(ALL))

        mark_size = 3
        mark_width = 2

        basename = os.path.basename(File)
        coord_file = basename.replace("PICT", "coords").replace("png", "csv")
        try:
            coord_list = CreateData.LoadCoords(coord_file, coord_dct)
            for c in coord_list:
                x = c[0]
                y = c[1]
                if c[2] == 1:
                    canvas.create_line(x - mark_size,
                                       y - mark_size,
                                       x + mark_size,
                                       y + mark_size,
                                       fill="blue",
                                       width=mark_width)
                    canvas.create_line(x - mark_size,
                                       y + mark_size,
                                       x + mark_size,
                                       y - mark_size,
                                       fill="blue",
                                       width=mark_width)
        except:
            pass

        # function to be called when mouse is clicked
        def printcoordsPos(event):
            # outputting x and y coords to console
            global colonies
            colonies += 1
            cx, cy = event2canvas(event, canvas)
            color = "#32b33b"
            canvas.create_line(cx - mark_size,
                               cy - mark_size,
                               cx + mark_size,
                               cy + mark_size,
                               fill=color,
                               width=mark_width)
            canvas.create_line(cx - mark_size,
                               cy + mark_size,
                               cx + mark_size,
                               cy - mark_size,
                               fill=color,
                               width=mark_width)
            coords.append((cx, cy, 1))

        def printcoordsNeg(event):
            cx, cy = event2canvas(event, canvas)
            canvas.create_line(cx - mark_size,
                               cy - mark_size,
                               cx + mark_size,
                               cy + mark_size,
                               fill="#ed9121",
                               width=mark_width)
            canvas.create_line(cx - mark_size,
                               cy + mark_size,
                               cx + mark_size,
                               cy - mark_size,
                               fill="#ed9121",
                               width=mark_width)
            coords.append((cx, cy, 0))

        def Undo(Event=None):
            xy = coords.pop()
            x = xy[0]
            y = xy[1]
            canvas.create_oval(x - 2,
                               y - 2,
                               x + 2,
                               y + 2,
                               outline="#9400D3",
                               fill="#9400D3")
            global colonies
            colonies -= 1

        # mouseclick event
        canvas.bind("<ButtonPress-1>", printcoordsPos)
        canvas.bind("<ButtonPress-3>", Undo)
        canvas.bind_all('<ButtonPress-2>', printcoordsNeg)

    root.mainloop()

    print(colonies)
    return coords, coord_file
예제 #9
0
def drug_nums_to_mysql(path, year, choice, num):
    """
    将历年药品每月用量及总和导入Mysql
    @param path: 文件路径
    @param year: 年份
    @param choice: 药品类型
    @param num: 展示数量
    @return: null
    """

    data = spark.read.format('parquet').load(path)
    data = data.select('PersonalType', 'RegisterDate', 'DT', 'DrugName', 'Count', 'CompRatio_Type') \
        .where("CompRatio_Type = '" + choice + "'") \
        .where("DT = {}".format(year)) \
        .drop('DT', 'CompRatio_Type')
    # 处理RegisterDate,提取月份
    data = data.withColumn('RegisterDate', data.RegisterDate.substr(6, 2)) \
        .withColumnRenamed('RegisterDate', 'Month')

    data = data.withColumn('Month', data.Month.cast(IntegerType())) \
        .withColumn('Count', data.Count.cast(IntegerType())) \
        .withColumn('DrugName', CD.changeNameUDF(data.DrugName)) \
        .where('DrugName != "0"')
    data_poor = data.where('PersonalType = 17').drop('PersonalType')
    data_not_poor = data.where('PersonalType != 17').drop('PersonalType')

    df_not_poor = get_data_temp(data_not_poor)
    df_poor = get_data_temp(data_poor)

    df_poor = df_poor.orderBy(df_poor['Sum'].desc())
    all_drug_nums_poor = [['药名', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']]
    all_drug_nums_not_poor = [['药名', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']]

    conn = get_conn()
    cur = conn.cursor()
    t = 0
    df_poor = df_poor.collect()

    for i in df_poor:
        print(str(t))
        if t == int(num):
            break
        t += 1
        temp_poor = []
        temp_not_poor = []

        try:
            j = df_not_poor.select('*').where(df_not_poor.DrugName == str(i['DrugName'])).collect()[0]
            print(i)
            temp_poor.append(i['DrugName'])
            temp_not_poor.append(i['DrugName'])
            # 0 --> 建档立卡
            # 1 --> 非建档立卡
            cur.execute(
                "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                (i["DrugName"], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9], i[10], i[11], i[12], year, 0,
                 choice, i['Sum']))
            cur.execute(
                "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                (i["DrugName"], j[1], j[2], j[3], j[4], j[5], j[6], j[7], j[8], j[9], j[10], j[11], j[12], year, 1,
                 choice, j['Sum']))
        except Exception as e:
            print(e)
            temp_poor.append(i['DrugName'])
            temp_not_poor.append(i['DrugName'])
            cur.execute(
                "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                (i["DrugName"], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9], i[10], i[11], i[12], year, 0,
                 choice, i['Sum']))
            cur.execute(
                "INSERT INTO drugNumList VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                (i["DrugName"], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, year, 1, choice, 0))
        conn.commit()

    # all_nums = {
    #     'poor': all_drug_nums_poor,
    #     'not_poor': all_drug_nums_not_poor
    # }
    # cur.close()
    # conn.close()

    '''