Esempio n. 1
0
def get_gene():
    table = util.get_modified_data()
    gene_list = []
    id_list = []
    current_date = util.get_current_date()
    pattern = re.compile('[A-Za-z0-9]*-*[A-Za-z0-9]*,\s*\{[0-9]*\.?[0-9]*\}')
    for i in range(table.nrows):  # 循环逐行打印
        if i == 0:
            continue
        else:
            list = util.get_list_by_enter(table.row_values(i)[3])  # 获取基因数据
            for item in list:  # 对每一行数据进行处理
                if item == '\n' or item == '':
                    continue
                m = pattern.match(item)
                if m != None:
                    temp = m.group()
                    gene_list.append(temp.split(',')[0])
                    id_list.append(
                        temp.split(',')[1][temp.split(',')[1].find('{') +
                                           1:temp.split(',')[1].find('}')])
    wb = xlwt.Workbook()
    sheet1 = wb.add_sheet('gene')
    sheet1.write(0, 0, 'id')
    sheet1.write(0, 1, 'geneName')
    sheet1.write(0, 2, 'geneNumber')
    for i in range(len(gene_list)):
        sheet1.write(i + 1, 0, i + 1)
        sheet1.write(i + 1, 1, gene_list[i])
        sheet1.write(i + 1, 2, id_list[i])
    wb.save("../final_data/gene.xls")
Esempio n. 2
0
def get_hpo():
    current_date = util.get_current_date()
    table = util.get_file_data('../final_data/system.xls')
    src = []
    dst = []
    for i in range(table.nrows):
        if i == 0:
            continue
        else:
            src.append(table.row_values(i)[8])

    for item in src:
        if item=="":
            continue
        item = item[item.index(":") + 1:]
        list = util.get_list_by_colon(item)
        for l in list:
            if l == "":
                continue
            else:
                try:
                    dst.index(l)
                except:
                    dst.append(l)

    wb = xlwt.Workbook()
    sheet1 = wb.add_sheet('sheet1')
    sheet1.write(0, 0, 'hid')
    sheet1.write(0, 1, 'hpo')

    for i in range(len(dst)):
        # print(result[i][j])
        try:
            sheet1.write(i + 1, 0, i + 1)
            sheet1.write(i + 1, 1, dst[i])
        except:
            print(i, dst[i])
    wb.save("../final_data/HPO.xls")
    print("end")
Esempio n. 3
0
def get_all():
    # 获取需要的数据
    type_dict = util.get_type_list()
    raw_data = util.get_modified_data()
    current_date = util.get_current_date()

    phenomenon_src = []
    phenomenon_dst = []
    type_dst = []
    system_dst = []

    for i in range(raw_data.nrows):
        if i == 0:
            continue
        else:
            phenomenon_src.append(raw_data.row_values(i)[4])

    for p in phenomenon_src:
        list = util.get_list_by_enter(p)
        for l in list:
            if l == ";snomedct:;;;;" or l == "":
                continue
            first_colon = l.index(':')
            first_semicolon = l.index(';')
            type = l[0:first_colon]  # 获取疾病部位
            phenomenon = l[0:first_semicolon]  # 获取疾病描述
            system = l[first_semicolon + 1:]  # 获取各个系统的所有信息
            # 此处假设对相同部位相同表型,只有一种描述
            try:
                phenomenon_dst.index(phenomenon)
            except:
                type_dst.append(type)
                phenomenon_dst.append(phenomenon)
                system_dst.append(system)

    wb = xlwt.Workbook()
    sheet1 = wb.add_sheet('sheet1')
    sheet1.write(0, 0, 'pid')
    sheet1.write(0, 1, 'phenomenon')
    sheet1.write(0, 2, 'type')
    sheet1.write(0, 3, 'system')
    sheet1.write(0, 4, 'snomedct')
    sheet1.write(0, 5, 'UMLS')
    sheet1.write(0, 6, 'ICD10CM')
    sheet1.write(0, 7, 'ICD9Cm')
    sheet1.write(0, 8, 'HPO')
    for i in range(len(phenomenon_dst)):
        try:
            sheet1.write(i + 1, 0, i + 1)
            sheet1.write(i + 1, 1, phenomenon_dst[i])
            sheet1.write(i + 1, 2, type_dict.index(type_dst[i]) + 1)
            sheet1.write(i + 1, 3, system_dst[i])
            list = util.get_list_by_semicolon(system_dst[i])
            sheet1.write(i + 1, 4, list[0])
            sheet1.write(i + 1, 5, list[1])
            sheet1.write(i + 1, 6, list[2])
            sheet1.write(i + 1, 7, list[3])
            sheet1.write(i + 1, 8, list[4])
        except:
            print(i, phenomenon_dst[i], ":", type_dst[i])
    wb.save("../final_data/system.xls")
    print("end")
Esempio n. 4
0
def prehandler():
    current_date = util.get_current_date()

    phenomenon_src = []
    phenomenon_dst = []
    mimnumber_src = []
    preferred_title_src = []
    inheritance_src = []
    gene_src = []
    # 获取原表数据
    raw_data = util.get_raw_data()
    for i in range(raw_data.nrows):
        if i == 0:
            continue
        else:
            phenomenon_src.append(raw_data.row_values(i)[4])
            mimnumber_src.append(raw_data.row_values(i)[0])
            preferred_title_src.append(raw_data.row_values(i)[1])
            inheritance_src.append(raw_data.row_values(i)[2])
            gene_src.append(raw_data.row_values(i)[3])
    # 获取部位列表
    type_dict = util.get_type_list()

    # 处理表型,在每个表型前增加部位,并将之前多余的信息去除
    for p in phenomenon_src:
        list = util.get_list_by_enter(p)
        if list[-1] == "":
            list.pop()
        type = ''
        temp = ''
        for l in list:
            if l == ";snomedct:;;;;":
                continue
            first_colon = l.index(':')
            first_semicolon = l.index(';')
            if first_colon < first_semicolon:
                tmp = l[:first_colon]
                try:
                    type_dict.index(tmp)
                except:
                    l = type + ":" + l
                else:
                    type = tmp
            else:
                l = type + ":" + l
            temp = temp + "\n" + l;
        phenomenon_dst.append(temp)
    wb = xlwt.Workbook()
    sheet1 = wb.add_sheet('sheet1')
    sheet1.write(0, 0, 'mimnumber')
    sheet1.write(0, 1, 'preferredTitle')
    sheet1.write(0, 2, 'inheritance')
    sheet1.write(0, 3, 'molecularBasis')
    sheet1.write(0, 4, 'clinicalSynopsis')

    for i in range(len(phenomenon_dst)):
        sheet1.write(i + 1, 0, mimnumber_src[i])
        sheet1.write(i + 1, 1, preferred_title_src[i])
        sheet1.write(i + 1, 2, inheritance_src[i])
        sheet1.write(i + 1, 3, gene_src[i])
        sheet1.write(i + 1, 4, phenomenon_dst[i])

    wb.save("../final_data/omim_phenotype_modified.xls")

    print("modify data end")