Exemple #1
0
    def __init__(self, database="软件学院", feature_name=None):
        from background_program.z_Tools.my_database import MyDataBase

        self.db = MyDataBase(database)
        self.executer = self.db.getExcuter()
        self.feature_name = feature_name
        self.this_year = 2017  # 一个同学一个学年作为一个记录
def get_missing_students():
    from background_program.z_Tools.my_database import MyDataBase

    executer = MyDataBase("软件学院").getExcuter()
    sqls_and_reasons = [
        (m_out_1_not_in_sql, '超过1点出宿舍,并且超过1个小时没回来'),
        (over_2_in_3_times_sql, '超过1点才回宿舍次数超过3次'),
        #                          (wrong_time_sql, '一个月record_type未授权超过120次'),
    ]

    for sql_and_reason in sqls_and_reasons:
        executer.execute(sql_and_reason[0])
        for i in executer.fetchall():
            add_student(student_num=i[0] + str(i[1].year),
                        reason=sql_and_reason[1])

    return missing_students
Exemple #3
0
def doit():
    """
            将students表中的中文字符串改为数字,方便后续处理
    @params
    @return
    """
    from background_program.z_Tools.my_database import MyDataBase

    db = MyDataBase("软件学院")
    executer = db.getExcuter()
    sql = ""
    with open('data_clean.sql', 'r', encoding='UTF-8') as f:
        for line in f.readlines():
            print(sql)
            sql = sql + line

    executer.execute(sql)
    db.close()
Exemple #4
0
def show_student_info(request):
    from background_program.z_Tools.my_database import MyDataBase
    student_num=request.session.get('student_num')
    db = MyDataBase("软件学院")
    executer = db.getExcuter() 
    sql="select * from students where student_num like '{0}%'".format(student_num)
    executer.execute(sql)
    student = executer.fetchone()
    print(student)
    db.close
    column_name=['学年参与活动数量','参与活动的平均活跃程度','活动持续时间','活动平均参与分','表彰级别',\
                 '平均每学年获荣誉次数','图书借阅次数','学年学习总时间','周末自习时间','GPA','成绩排名','助学金等级','助学金金额','挂科科目数',\
                 '重修通过的科目数量','重修未过的科目数量','社会实践参与总时间','社会实践参与是否重点','平均每日进出次数','奖学金等级','奖学金金额',\
                 '成绩','周末平均最早出宿舍时间','平均周末最迟回宿舍时间','工作日平均在外时间','食堂总消费额','超市总消费额','其他类别总消费额','充值总额','小吃消费总额',\
                 '锻炼总消费额','学习消费总额','充值日平均消费额最大值','锻炼日消费最大值','小吃日消费最大值','学习日消费最大值','超市日消费最大值','餐厅日消费最大值','其他类别日消费最大值',\
                 '充值月消费最大值','锻炼月消费最大值','小吃月消费最大值','学习月消费最大值','食堂月消费最大值','超市月消费最大值','其他类别月消费最大值','充值月消费最小值','锻炼月消费最小值','小吃月消费最小值',\
                 '学习月消费最小值','食堂月消费最小值','超市月消费最小值','其他类别月消费最小值','总消费次数','食堂消费额占总消费额的比例','食堂消费次数','总消费额','食堂消费的中位数','超市消费的中位数','充值消费的中位数','小吃消费的中位数',\
                 '锻炼消费的中位数','学习消费的中位数','其他类别消费的中位数','食堂消费的平均值','超市消费的平均值','充值消费的平均值','小吃消费的平均值',\
                 '锻炼消费的平均值','学习消费的平均值','其他类别消费的平均值','食堂消费的方差','超市消费的方差','充值消费的方差','其他类别消费的方差','小吃消费的方差',\
                 '锻炼消费的方差','学习消费的方差','能否毕业',]
    result={}
    UserName=student[0][0:14]
    student_name=student[1]
    student_type=student[2]
    school_year=student[22]
    j=0
    for i in range(len(student)):
        if i!=0 and i!=1 and i!=2 and i!=22:
            if student[i] is None or int(student[i])==0:
                result[column_name[j]]=float(0)
            else:
                result[column_name[j]]=student[i]
            j=j+1
#     return render(request,'student_client/show_student_info.html',{'result':json.dumps(result),'UserName':UserName,'student_name':student_name,\
#                                                                    'student_type':student_type,'school_year':school_year,})
    context = dict()
    context['UserName'] = UserName
    context['student_name'] = student_name
    context['student_type'] = student_type
    context['school_year'] = school_year
    context['student_info'] = student_data.get_students_info(result, request)
    template = loader.get_template('student_client/show_student_info.html')
    return HttpResponse(template.render(context, request))
Exemple #5
0
def clean_useless_data():
    from background_program.z_Tools.my_database import MyDataBase
    #     try:
    db = MyDataBase("软件学院")
    executer = db.getExcuter()
    sql = "SELECT *\
            FROM\
                students\
            WHERE\
                score IS NOT NULL\
            AND canteen_times IS NOT NULL"

    executer.execute(sql)
    results = [list(i) for i in executer.fetchall()]
    for i in range(len(results)):
        for j in range(len(results[i])):
            if results[i][j] is None:
                results[i][j] = 0
        sql = "insert into students_float values{0}".format(tuple(results[i]))
        executer.execute(sql)

    db.close()
Exemple #6
0
def get_single_student_info(student_num):
    from background_program.z_Tools.my_database import MyDataBase
    db = MyDataBase("软件学院")
    executer = db.getExcuter() 
    sql="select * from students where student_num like '{0}%'".format(student_num)
    executer.execute(sql)
    student = executer.fetchone()
    print(student)
    db.close
    column_name=['学年参与活动数量','参与活动的平均活跃程度','活动持续时间','活动平均参与分','表彰级别',\
                 '平均每学年获荣誉次数','图书借阅次数','学年学习总时间','周末自习时间','GPA','成绩排名','助学金等级','助学金金额','挂科科目数',\
                 '重修通过的科目数量','重修未过的科目数量','社会实践参与总时间','社会实践参与是否重点','平均每日进出次数','奖学金等级','奖学金金额',\
                 '成绩','周末平均最早出宿舍时间','平均周末最迟回宿舍时间','工作日平均在外时间','食堂总消费额','超市总消费额','其他类别总消费额','充值总额','小吃消费总额',\
                 '锻炼总消费额','学习消费总额','充值日平均消费额最大值','锻炼日消费最大值','小吃日消费最大值','学习日消费最大值','超市日消费最大值','餐厅日消费最大值','其他类别日消费最大值',\
                 '充值月消费最大值','锻炼月消费最大值','小吃月消费最大值','学习月消费最大值','食堂月消费最大值','超市月消费最大值','其他类别月消费最大值','充值月消费最小值','锻炼月消费最小值','小吃月消费最小值',\
                 '学习月消费最小值','食堂月消费最小值','超市月消费最小值','其他类别月消费最小值','总消费次数','食堂消费额占总消费额的比例','食堂消费次数','总消费额','食堂消费的中位数','超市消费的中位数','充值消费的中位数','小吃消费的中位数',\
                 '锻炼消费的中位数','学习消费的中位数','其他类别消费的中位数','食堂消费的平均值','超市消费的平均值','充值消费的平均值','小吃消费的平均值',\
                 '锻炼消费的平均值','学习消费的平均值','其他类别消费的平均值','食堂消费的方差','超市消费的方差','充值消费的方差','其他类别消费的方差','小吃消费的方差',\
                 '锻炼消费的方差','学习消费的方差','能否毕业',]
    result={}
    if student is None:
        return None
    else:
        UserName=student[0][0:14]
        student_name=student[1]
        student_type=student[2]
        school_year=student[22]
        j=0
        for i in range(len(student)):
            if i!=0 and i!=1 and i!=2 and i!=22:
                if student[i] is None or int(student[i])==0:
                    result[column_name[j]]=float(0)
                else:
                    result[column_name[j]]=student[i]
                j=j+1
        return result
Exemple #7
0
def Search_student_info(student_num):
    from background_program.z_Tools.my_database import MyDataBase
    try:
        db = MyDataBase("软件学院")
        executer = db.getExcuter()
        sql="select * from students where student_num like '%{0}%'".format(student_num)
        executer.execute(sql)
        student = list(executer.fetchone())
        column_name=['学号','姓名','学生类别','学年参与活动数量','参与活动的平均活跃程度','活动持续时间','活动平均参与分','表彰级别',\
                     '平均每学年获荣誉次数','图书借阅次数','学年学习总时间','周末自习时间','GPA','成绩排名','助学金等级','助学金金额','挂科科目数',\
                     '重修通过的科目数量','重修未过的科目数量','社会实践参与总时间','社会实践参与是否重点','平均每日进出次数','学年','奖学金等级','奖学金金额',\
                     '成绩','周末平均最早出宿舍时间','平均周末最迟回宿舍时间','工作日平均在外时间','食堂总消费额','超市总消费额','其他类别总消费额','充值总额','小吃消费总额',\
                     '锻炼总消费额','学习消费总额','充值日平均消费额最大值','锻炼日消费最大值','小吃日消费最大值','学习日消费最大值','超市日消费最大值','餐厅日消费最大值','其他类别日消费最大值',\
                     '充值月消费最大值','锻炼月消费最大值','小吃月消费最大值','学习月消费最大值','食堂月消费最大值','超市月消费最大值','其他类别月消费最大值','充值月消费最小值','锻炼月消费最小值','小吃月消费最小值',\
                     '学习月消费最小值','食堂月消费最小值','超市月消费最小值','其他类别月消费最小值','总消费次数','食堂消费额占总消费额的比例','食堂消费次数','总消费额','食堂消费的中位数','超市消费的中位数','充值消费的中位数','小吃消费的中位数',\
                     '锻炼消费的中位数','学习消费的中位数','其他类别消费的中位数','食堂消费的平均值','超市消费的平均值','充值消费的平均值','小吃消费的平均值',\
                     '锻炼消费的平均值','学习消费的平均值','其他类别消费的平均值','食堂消费的方差','超市消费的方差','充值消费的方差','其他类别消费的方差','小吃消费的方差',\
                     '锻炼消费的方差','学习消费的方差']
        result={}
        for i in range(len(student)):
            result[column_name[i]]=student[i]
        return result
    except:
        pass
Exemple #8
0
class DataCarer():
    def __init__(self, label_name, school_year, usage="regression"):
        '''
        @params label_name:xxx......
        @return 
        '''
        self.usages = ["regression", "classification"]
        self.usage = usage
        if usage not in self.usages:
            print('用法错误:%s' % usage)
        elif usage == "regression":
            self.table_name = "students_float"
        elif usage == "classification":
            self.table_name = "students_int"

        self.label_name = label_name
        self.school_year = school_year
        self.labellist = []

    def create_train_dataSet(self):
        '''
                        获取训练数据
        @params 
        @return numpy.mat X_train:特征,numpy.mat Y_train:标签
                list labellist:特征项名称的集合
        '''
        """确定label是哪一列,并将其作为待预测对象"""
        self.open_database("软件学院")
        self.executer.execute("DESCRIBE {0}".format(self.table_name))
        columnName = self.executer.fetchall()
        index = -1
        for i in range(2, len(columnName)):
            if str(columnName[i][0]) == self.label_name:
                index = i
            else:
                self.labellist.append(columnName[i][0])
        if index == -1:
            print('异常:未发现' + self.label_name)
        """先获得数据库表中的全部学生的数据"""
        self.executer.execute("select * from {0}".format(self.table_name))
        dataSet = []
        for i in self.executer.fetchall():
            student = Student(student_num=i[0],
                              features=list(i[2:index]) + list(i[index + 1:]),
                              label=i[index])
            dataSet.append(student.getAll())
        dataSet = np.array(dataSet)
        self.close_database()

        dataSet = mat(dataSet)

        X_train, Y_train = mat(dataSet[:, :-1]), mat(dataSet[:, -1])
        #         self.pre_process(X_train)
        return X_train, Y_train

    def create_validate_dataSet(self):
        '''
                        获取测试数据
        @params 
        @return list[Student] students:学生列表,numpy.mat X_test:特征
        '''
        """确定label是哪一列,并将其作为待预测对象"""
        self.open_database("软件学院")
        self.executer.execute("DESCRIBE {0}".format(self.table_name))
        columnName = self.executer.fetchall()
        index = -1
        for i in range(len(columnName)):
            if str(columnName[i][0]) == self.label_name:
                index = i
                break
        if index == -1:
            print('异常:未发现' + self.label_name)
        """获得所有学生的数据"""
        sql = "select * from {0} where right(student_num,4) in('{1}','2016')"
        self.executer.execute(sql.format(self.table_name, self.school_year))
        students, X_test = [], []
        for i in self.executer.fetchall():
            student = Student(student_num=i[0],
                              features=list(i[2:index]) + list(i[index + 1:]),
                              label=i[index])
            X_test.append(student.features)
            students.append(student)
        A = np.array(X_test)
        X_test = mat(X_test)
        self.close_database()
        self.pre_process(X_test)
        return students, X_test

    def pre_process(self, X):
        '''
        Created on 2017年12月20日
                        对数据进行初级的预处理
        @author: Jack
        @params 
        @return 
        '''
        from background_program.b_Sample_processing.PreProcessing.MyImputer import MyImputer

        MyImputer().transformer.fit_transform(X)

    def get_feature_range(self, feature_name, label_name, label_min,
                          label_max):
        self.open_database("软件学院")
        sql = 'select min({0}),max({0}) from {1} where {2} >= {3} and {2} < {4}'.\
            format(feature_name, 'students_float', label_name, label_min, label_max)
        try:
            self.executer.execute(sql)
            result = self.executer.fetchone()
            feature_min, feature_max = result[0], result[1]
            self.close_database
            return feature_min, feature_max
        except:
            return 0, 0

    def open_database(self, database_name):
        self.db = MyDataBase(database_name)
        self.executer = self.db.getExcuter()

    def close_database(self):
        self.executer.close()
        self.db.close()
Exemple #9
0
 def open_database(self, database_name):
     self.db = MyDataBase(database_name)
     self.executer = self.db.getExcuter()
Exemple #10
0
class FeatureCalculater:
    '''
            特征计算器的父类
    '''
    def __init__(self, database="软件学院", feature_name=None):
        from background_program.z_Tools.my_database import MyDataBase

        self.db = MyDataBase(database)
        self.executer = self.db.getExcuter()
        self.feature_name = feature_name
        self.this_year = 2017  # 一个同学一个学年作为一个记录

    def get_school_year(self, student_num):
        in_year = int(student_num[3:7])
        school_year = [str(i) for i in range(in_year, self.this_year + 1)]

        return school_year

    @my_exception_handler
    def add_student(self, student_num):
        '''
                        添加学生
        @params string student_num:学生学号
        @retrun
        '''
        sql = "insert into students(student_num) values('{0}')".format(
            student_num)
        self.executer.execute(sql)

    @my_exception_handler
    def calculate(self):
        '''
                        获取学号
                        所有子类都要实现这个函数
                        特征值的计算
        @params 
        @retrun
        '''
        sql = "select distinct student_num,student_name,grade,student_type from subsidy"
        self.executer.execute(sql)
        result = self.executer.fetchall()
        student_types = ['普通高校本科学生', '硕士研究生', '交流生', '普通进修生', '硕士专业学位研究生']
        student_type = 0
        for re in result:
            count = int(re[2])
            for s_i in range(len(student_types)):
                if re[3] == student_types[s_i]:
                    student_type = s_i
                    break
            if student_type == 0:
                while count <= int(re[2]) + 8 and count <= 2017:
                    sql = "insert into students(student_num,student_name,student_grade,student_type) values(%s,%s,%s,%s)"
                    self.executer.execute(sql, (str(re[0]) + str(count), re[1],
                                                str(re[2]), student_type))
                    count = count + 1
            else:
                while count <= int(re[2]) + 8 and count <= 2017:
                    sql = "insert into students(student_num,student_name,student_grade,student_type) values(%s,%s,%s,%s)"
                    self.executer.execute(sql, (str(re[0]) + str(count), re[1],
                                                str(re[2]), student_type))
                    count = count + 1

    @my_exception_handler
    def cluster(self, feature_min=None, feature_max=None, clusters=1):
        from sklearn.cluster import KMeans
        import numpy
        '''
                        所有子类都要实现这个函数
                        对特征值进行聚类来归一化处理
        @params 
        @retrun
        '''
        # 获得学生的数据
        sql = 'SELECT {0} FROM students_float WHERE {1} is not null and {2} <> 0'.format(
            self.feature_name, self.feature_name, self.feature_name)
        count = self.executer.execute(sql)
        result = self.executer.fetchall()  # count是行数
        dataSet2 = list(result)
        dataSet = numpy.array([i[0] for i in result]).reshape(count, 1)

        # 聚类
        kmeans = KMeans(n_clusters=clusters, random_state=0).fit(dataSet)
        center = kmeans.cluster_centers_  # 求出聚类中心
        center_x = ['%0.4f' % center[i][0] for i in range(len(center))]
        labels = kmeans.labels_  # 标注每个点的聚类结果
        # 写入student_rank表
        types, maxx, minn = [[] for i in range(0, clusters)], [], []
        dataSet_quene = list(set(dataSet2))
        print(len(dataSet_quene))
        for i in range(len(labels)):
            types[labels[i]].append(dataSet[i][0])
            if len(dataSet_quene) > 0:
                for j in range(len(dataSet_quene)):
                    if dataSet[i][0] == dataSet_quene[j]:
                        del dataSet_quene[j]
                        print(len(dataSet_quene))
                        sql = 'select student_num from students_float where {0}={1}'.format(
                            self.feature_name, str((dataSet[i][0])))
                        self.executer.execute(sql)
                        student_nums = self.executer.fetchall()
                        for student_num in student_nums:
                            student_num = student_num[0]
                            sql = "update students_int set {0} = {1} where student_num = {2}".format(
                                self.feature_name, str(labels[i] + 1),
                                student_num)
                            n_update = self.executer.execute(sql)
                            if n_update == 0:
                                try:
                                    sql = 'insert into students_int(student_num) values({0})'.format(
                                        student_num)
                                    self.executer.execute(sql)
                                except:
                                    pass
                                sql = "update students_int set {0} = {1} where student_num = {2}".format(
                                    self.feature_name, str(labels[i] + 1),
                                    student_num)
                                self.executer.execute(sql)
                        break
        # xxxxxx
        if feature_max is None and feature_min is None:
            for i in range(0, clusters):
                maxx.append(max(types[i]))
                minn.append(min(types[i]))
            maxx, minn, cent = sorted(maxx), sorted(minn), sorted(
                [float(i) for i in center_x])
            for i in range(len(maxx) - 1):
                if maxx[i] < minn[i + 1]:
                    temp = (float(maxx[i]) + float(minn[i + 1])) / 2.0
                    minn[i + 1] = (float(maxx[i]) + float(minn[i + 1])) / 2.0
                    maxx[i] = temp

            sql = 'SELECT max({0}) FROM students_float'.format(
                self.feature_name)
            self.executer.execute(sql)
            max_num = int(self.executer.fetchone()[0])
            maxx[len(maxx) - 1] = max_num
        else:
            maxx, minn, cent = feature_max, feature_min, (feature_max +
                                                          feature_min) / 2.0
        # 将聚类范围保存
        with open(r"Cluster_Feature", "a", encoding='utf8') as f:
            f.write(str(self.feature_name) + '\n')
            f.write(
                str(0) + ':' + str(0) + ' ' + str(0) + ' ' + str(minn[0]) +
                '\n')  # 手动加入第一区间
            for i in range(len(cent)):
                f.write(
                    str(i + 1) + ':' + str(cent[i]) + ' ' + str(minn[i]) +
                    ' ' + str(maxx[i]) + '\n')

    def tearDown(self):
        '''
        teardown
        @params 
        @retrun
        '''
        self.db.close()