Ejemplo n.º 1
0
def loadData(version, code, core=False):
    '''
    将某一学科的教师的id和对应的学科代码 以及这些教师id对应的论文信息存到文件中
    :param version:学科名字
    :param code:学科代码
    :param core:
    :return:
    '''
    print("加载数据")
    try:
        dic = pickle.load(open('data/dic', 'rb'))
    except:
        dic = {}
    dic[version] = code
    pickle.dump(dic, open('data/dic', 'wb'))
    # sql = 'SELECT id FROM `teacher_dis_code` where discipline_code=%s' #取属于某一学科的教师id
    sql = 'SELECT teacher_id FROM `teacher_discipline` where discipline_id = %s'  #取属于某一学科的教师id

    if not core:  #是否是核心期刊
        # papersql = "select id,`name`,abstract,keyword from paper2 where  author_id=%s and checkOrg=1"
        papersql = "select id, `name`, abstract, keyword from paper where  author_id=%s and checkOrg=1"
    else:
        # papersql="select id,`name`,abstract,keyword from paper2 where core_journal=1 and author_id=%s and checkOrg=1"
        papersql = "select id,`name`,abstract,keyword from paper where core_journal=1 and author_id=%s and checkOrg=1"
    # engsql = "select cn from englist_title where id=%s"
    teacher = dbs.getDics(sql, (code, ))  #code学科代码
    try:
        os.makedirs("data/" + version)
    except:
        pass
    file = open("data/" + version + "/" + code + ".txt", 'w', encoding='utf8')
    num1, num2, num3 = 0, 0, 0
    for t in teacher:
        id = t["teacher_id"]
        papers = dbs.getDics(papersql, (id, ))  #id作者的id
        num1 += len(papers)
        paper = []
        keyWord = []
        for p in papers:
            pid = p["id"]
            temp = {
                "id": pid,
                "paper": p["name"] + ',' + p['abstract'],
                "keyWord": p["keyword"].split(',')
            }
            #将作者的id, 论文的名字, 论文的摘要, 论文的关键词写入文件, 文件名是学科的名字
            file.write(str(temp) + '\n')

    file.close()
    print("paper:", num1)
    print("eng:", num2)
    print("teacher:", num3)

    return num1
def create_teacher_discipline():
    sql = "select ID, INSTITUTION_ID from es_teacher"
    data = dbs.getDics(sql)
    # print(type(data))
    teacher_institution = {}
    for l in data:
        teacher_id = l["ID"]
        institution_id = l["INSTITUTION_ID"]
        teacher_institution[teacher_id] = institution_id


    sql2 = "select INSTITUTION_ID, DISCIPLINE_CODE from es_relation_in_dis"
    data2 = dbs.getDics(sql2)
    institution_discipline = {}
    for l in data2:
        institution_id = l["INSTITUTION_ID"]
        discipline_id = l["DISCIPLINE_CODE"]

        if institution_discipline.__contains__(institution_id) == False:
            institution_discipline[institution_id] = discipline_id

    # print(teacher_institution)
    # print(institution_discipline)
    teacher_discipline = {}
    count = 0
    res = ""
    for teacher_id in teacher_institution:
        institution_id = teacher_institution[teacher_id]
        if institution_discipline.__contains__(institution_id):
            discipline_id = institution_discipline[institution_id]
        else:
            continue
        # print(teacher_id, "  ", discipline_id)
        teacher_discipline[teacher_id] = discipline_id
        count += 1
        res += "(" + str(teacher_id) + "," + str(institution_id) + "," + "\'" +str(discipline_id) + "\'" + "), "
        if count >= 3000:
            sql = "insert into teacher_discipline(teacher_id, institution_id, discipline_id)values"
            res = res[0: len(res)-2]
            sql += res
            print(sql)
            res = ""
            count = 0
            dbs.exe_sql(sql)
    sql = "insert into teacher_discipline(teacher_id, institution_id, discipline_id) values"
    res = res[0: len(res) - 2]
    sql += res
    dbs.exe_sql(sql)
Ejemplo n.º 3
0
def get_institution_info(lab_dict):
    institution_info = dbs.getDics(
        "select id, school_name, `name` from es_institution")

    institution_dict = {}

    for i in institution_info:
        institution_id = i['id']
        institution_school = i['school_name']
        institution_name = i['name']
        institution_dict[(institution_school,
                          institution_name)] = institution_id
    print(institution_dict)
    count = 0
    sql = "insert into institution_lab(institution_id, lab_id)values "
    res = ""
    for lab_tup in lab_dict:
        for school_insti in institution_dict:
            if lab_tup[0] == school_insti[0]:
                institution_name = school_insti[1]
                if institution_name[len(institution_name) - 2:] == "学院":
                    institution_name = institution_name[0:len(institution_name
                                                              ) - 2]
                if institution_name in lab_tup[1]:
                    res += "(" + str(
                        institution_dict[school_insti]) + "," + str(
                            lab_dict[lab_tup]) + "),"
                    count += 1
    res = res[0:len(res) - 1]
    sql += res
    print(sql)
    dbs.exe_sql(sql)
    print(count)
Ejemplo n.º 4
0
 def __init__(self):
     print("-----")
     # sql = "SELECT b.name,a.discipline_code from (SELECT discipline_code FROM `teacher_dis_code`  GROUP BY discipline_code) a LEFT JOIN discipline_new b on a.discipline_code=b.code "
     sql = "SELECT b.NAME, a.discipline_id from (SELECT discipline_id FROM `teacher_discipline`  GROUP BY discipline_id) a LEFT JOIN es_discipline b on a.discipline_id=b.CODE "
     discipline = dbs.getDics(
         sql)  #[{'name':**学科,"discipline_code": **学科代码} ,  {},  {} ,  ]
     print(discipline)
     # 是否是核心期刊
     core = [False]
     # 是否去重
     file = ["tdidf"]  #????
     self.used = {}
     self.dic = []
     # print(discipline)
     for d in discipline:
         # print(d)
         if d['discipline_id'][0:2] == "08":
             for c in core:
                 for f in file:
                     version = d["NAME"]  #学科名字
                     # if c:
                     #     version += "-core-stopword4"
                     # else:
                     #     version += "-all-stopword4"
                     # version += "-"+f+"-5-0.6-" + d["discipline_code"]
                     version += "-" + d["discipline_id"]  #学科名字-学科代码
                     self.dic.append([c, f, d["discipline_id"], version])
     print(self.dic)
Ejemplo n.º 5
0
def get_lab_info():
    lab_info = dbs.getDics("select id, org, institution from national_key_lab")
    lab_dict = {}

    for i in lab_info:
        lab_id = i['id']
        lab_school = i['org']
        lab_institution = i['institution']
        lab_dict[(lab_school, lab_institution)] = lab_id
    # print(lab_dict)
    return lab_dict