def update_yuanshi(id_name_dict, school_dict, teacher_dict): ''' 从参数id_name_dict中获取缺少相关信息的院士的名字,在教师表中进行匹配,得到所缺失的信息,然后对院士表的相关字段进行更新。 :param id_name_dict: :return: ''' for id in id_name_dict.keys(): name = id_name_dict[id] #获取缺少学校或teacher_id的院士名字 if teacher_dict.__contains__(name): teacher_info_list = teacher_dict[name] else: continue # teacher_info_tup = () if teacher_info_list is []: continue else: teacher_info_tup = teacher_info_list[0] teacher_id = teacher_info_tup[0] school_id = teacher_info_tup[1] if school_dict.__contains__(school_id): school_name = school_dict[school_id] else: continue s1 = "teacher_id_list = %s" % str(teacher_id) + "," s2 = "school = \"%s\"" % (school_name) + " where id = %s" % (id) sql = "UPDATE eval_yuanshi set " + s1 + s2 print(sql) DBhelper.execute(sql)
def get_paper_author(teacher_dict): ''' 用于获取论文的id,以及author 的信息, 解析author的信息,并与teacher_dict中的name,school,institution对照,看是否为同一个人,如果是,则将其加入到新表当中 :param teacher_dict: :return: ''' author_info = DBhelper.execute("SELECT id, author from paper") author_dict = {} count = 0 res = "" for t in author_info: paper_id = t[0] # author_str = t[1] # author_list = json.loads(author_str) # # print(author_list) for d in author_list: #[ {"name":***, "org":"***"}, {} ] name = d["name"] #论文中的作者名字 school_info = d["org"] #论文中的学校学院名字 school_institution = parse_school_name(school_info) if teacher_dict.__contains__(name): same_name_teacher_list = teacher_dict[name] else: continue for t in same_name_teacher_list: teacher_school = t[0] teacher_institution = parse_institution_name(t[1]) count += 1 if len(school_institution) == 2: if teacher_school == school_institution[ 0] and teacher_institution in school_institution[1]: teacher_id = t[2] # print(teacher_id) res += "(" + str(teacher_id) + "," + str( paper_id) + ")," # continue elif len(school_institution) == 1: if teacher_school == school_institution[0]: teacher_id = t[2] res += "(" + str(teacher_id) + "," + str( paper_id) + ")," # print(teacher_id) if count >= 5000: count = 0 res = res[0:len(res) - 1] sql = "insert into teacher_paper(teacher_id, paper_id) values" + res DBhelper.execute(sql) print(sql) res = "" res = res[0:len(res) - 1] sql = "insert into teacher_paper(teacher_id, paper_id) values" + res DBhelper.execute(sql) # print(teacher_id) print("---", count)
def get_teacher_info(school_dict, institution_dict): ''' 接受参数来获取学校与学院各自id和name的对应关系, 并从es_teacher表中获取教师的name,school_id, institution_id ,id,组成{(name, school_name, institution_name) : teacher_id}并返回 :param school_dict: {id,name} :param institution_dict: {id, name} :return: { name: [(school_name, institution_name, teacher_id), (name,sc..., ins...)...], ...} ''' teacher_info = DBhelper.execute( "SELECT name, school_id, institution_id, id from es_teacher") teacher_dict = {} for i in teacher_info: # print(i) name = i[0] school_id = i[1] institution_id = i[2] teacher_id = i[3] school_name = school_dict[school_id] # print(institution_id) if institution_dict.__contains__(institution_id): institution_name = institution_dict[institution_id] else: institution_name = "" if teacher_dict.__contains__(name): teacher_dict[name].append( (school_name, institution_name, teacher_id)) else: teacher_dict[name] = [(school_name, institution_name, teacher_id)] print(teacher_dict[name]) return teacher_dict
def institution_to_discipline(institution_id_list): ''' 1.获取学院id和学科id关系,组成字典{学科id:学院id,.....} 2.通过传入的institution_id_list获得清华学院的id,并从1中找到对应的学科id,判断id是否为理工科, :param institution_id_list: :return: ''' data = DBhelper.execute( "select institution_id, discipline_code from es_relation_in_dis") in_dis_dict = {} for t in data: in_id = t[0] dis_code = t[1] in_dis_dict[in_id] = dis_code # print(in_dis_dict) c = 0 for id in institution_id_list: id = id[0] # print(id) if in_dis_dict.__contains__(id): dis_id = in_dis_dict[id] else: continue # 判断id是否是理工科的 if dis_id[0] == "0" and (dis_id[1] == "7" or dis_id[1] == "8"): print(id) c += 1 print(c)
def get_tsu_institution_info(): ''' 获取清华大学中的所有学院 :return: [institution1, institution2,.....] ''' data = DBhelper.execute( "select ID from es_institution where school_name= \"清华大学\"") return list(data)
def get_school_info(): ''' 从es_school表中获取学校的 id, name 组成{id, name}形式的列表并返回 :return: ''' school_info = DBhelper.execute("select id, NAME from es_school ") school_dict = {} for i in school_info: school_dict[i[0]] = i[1] # print(school_dict) return school_dict
def get_school_info(): ''' 获取学校中的id与名字,返回 :return:{id:name, ...} ''' data = DBhelper.execute("SELECT ID, NAME from es_school") school_dict = {} for t in data: school_id = t[0] school_name = t[1] school_dict[school_id] = school_name return school_dict
def get_institution_info(): ''' 从es_institution表中获取学院的id, name,组成{id, name}的形式返回 :return: ''' institution_info = DBhelper.execute( "select ID, `NAME` from es_institution") institution_dict = {} for i in institution_info: institution_dict[i[0]] = i[1] # print(institution_dict) return institution_dict
def get_academican_without_school(): ''' 获取院士表中没有学校或者teacher_id的院士的id和name,组成字典返回 :return: ''' data = DBhelper.execute( "SELECT name,id from eval_yuanshi where school is NULL or teacher_id_list is NULL" ) id_name_dict = {} for name_id in data: id = name_id[1] name = name_id[0] id_name_dict[id] = name return id_name_dict
def get_teacher_info(): ''' 获取教师的name, id, school :return: teacher_dict {name:[(id,school),...],} ''' teacher_dict = {} data = DBhelper.execute("select ID, NAME, school_id from es_teacher") for t in data: id = t[0] name = t[1] school_id = t[2] if teacher_dict.__contains__(name): teacher_dict[name].append((id, school_id)) else: teacher_dict[name] = [(id, school_id)] return teacher_dict