def __init__(self, database="软件学院", feature_name=None): from background_program.z_Tools.my_database import MyDataBase self.db = MyDataBase(database) self.executer = self.db.getExcuter() self.feature_name = feature_name self.this_year = 2017 # 一个同学一个学年作为一个记录
def get_missing_students(): from background_program.z_Tools.my_database import MyDataBase executer = MyDataBase("软件学院").getExcuter() sqls_and_reasons = [ (m_out_1_not_in_sql, '超过1点出宿舍,并且超过1个小时没回来'), (over_2_in_3_times_sql, '超过1点才回宿舍次数超过3次'), # (wrong_time_sql, '一个月record_type未授权超过120次'), ] for sql_and_reason in sqls_and_reasons: executer.execute(sql_and_reason[0]) for i in executer.fetchall(): add_student(student_num=i[0] + str(i[1].year), reason=sql_and_reason[1]) return missing_students
def doit(): """ 将students表中的中文字符串改为数字,方便后续处理 @params @return """ from background_program.z_Tools.my_database import MyDataBase db = MyDataBase("软件学院") executer = db.getExcuter() sql = "" with open('data_clean.sql', 'r', encoding='UTF-8') as f: for line in f.readlines(): print(sql) sql = sql + line executer.execute(sql) db.close()
def show_student_info(request): from background_program.z_Tools.my_database import MyDataBase student_num=request.session.get('student_num') db = MyDataBase("软件学院") executer = db.getExcuter() sql="select * from students where student_num like '{0}%'".format(student_num) executer.execute(sql) student = executer.fetchone() print(student) db.close column_name=['学年参与活动数量','参与活动的平均活跃程度','活动持续时间','活动平均参与分','表彰级别',\ '平均每学年获荣誉次数','图书借阅次数','学年学习总时间','周末自习时间','GPA','成绩排名','助学金等级','助学金金额','挂科科目数',\ '重修通过的科目数量','重修未过的科目数量','社会实践参与总时间','社会实践参与是否重点','平均每日进出次数','奖学金等级','奖学金金额',\ '成绩','周末平均最早出宿舍时间','平均周末最迟回宿舍时间','工作日平均在外时间','食堂总消费额','超市总消费额','其他类别总消费额','充值总额','小吃消费总额',\ '锻炼总消费额','学习消费总额','充值日平均消费额最大值','锻炼日消费最大值','小吃日消费最大值','学习日消费最大值','超市日消费最大值','餐厅日消费最大值','其他类别日消费最大值',\ '充值月消费最大值','锻炼月消费最大值','小吃月消费最大值','学习月消费最大值','食堂月消费最大值','超市月消费最大值','其他类别月消费最大值','充值月消费最小值','锻炼月消费最小值','小吃月消费最小值',\ '学习月消费最小值','食堂月消费最小值','超市月消费最小值','其他类别月消费最小值','总消费次数','食堂消费额占总消费额的比例','食堂消费次数','总消费额','食堂消费的中位数','超市消费的中位数','充值消费的中位数','小吃消费的中位数',\ '锻炼消费的中位数','学习消费的中位数','其他类别消费的中位数','食堂消费的平均值','超市消费的平均值','充值消费的平均值','小吃消费的平均值',\ '锻炼消费的平均值','学习消费的平均值','其他类别消费的平均值','食堂消费的方差','超市消费的方差','充值消费的方差','其他类别消费的方差','小吃消费的方差',\ '锻炼消费的方差','学习消费的方差','能否毕业',] result={} UserName=student[0][0:14] student_name=student[1] student_type=student[2] school_year=student[22] j=0 for i in range(len(student)): if i!=0 and i!=1 and i!=2 and i!=22: if student[i] is None or int(student[i])==0: result[column_name[j]]=float(0) else: result[column_name[j]]=student[i] j=j+1 # return render(request,'student_client/show_student_info.html',{'result':json.dumps(result),'UserName':UserName,'student_name':student_name,\ # 'student_type':student_type,'school_year':school_year,}) context = dict() context['UserName'] = UserName context['student_name'] = student_name context['student_type'] = student_type context['school_year'] = school_year context['student_info'] = student_data.get_students_info(result, request) template = loader.get_template('student_client/show_student_info.html') return HttpResponse(template.render(context, request))
def clean_useless_data(): from background_program.z_Tools.my_database import MyDataBase # try: db = MyDataBase("软件学院") executer = db.getExcuter() sql = "SELECT *\ FROM\ students\ WHERE\ score IS NOT NULL\ AND canteen_times IS NOT NULL" executer.execute(sql) results = [list(i) for i in executer.fetchall()] for i in range(len(results)): for j in range(len(results[i])): if results[i][j] is None: results[i][j] = 0 sql = "insert into students_float values{0}".format(tuple(results[i])) executer.execute(sql) db.close()
def get_single_student_info(student_num): from background_program.z_Tools.my_database import MyDataBase db = MyDataBase("软件学院") executer = db.getExcuter() sql="select * from students where student_num like '{0}%'".format(student_num) executer.execute(sql) student = executer.fetchone() print(student) db.close column_name=['学年参与活动数量','参与活动的平均活跃程度','活动持续时间','活动平均参与分','表彰级别',\ '平均每学年获荣誉次数','图书借阅次数','学年学习总时间','周末自习时间','GPA','成绩排名','助学金等级','助学金金额','挂科科目数',\ '重修通过的科目数量','重修未过的科目数量','社会实践参与总时间','社会实践参与是否重点','平均每日进出次数','奖学金等级','奖学金金额',\ '成绩','周末平均最早出宿舍时间','平均周末最迟回宿舍时间','工作日平均在外时间','食堂总消费额','超市总消费额','其他类别总消费额','充值总额','小吃消费总额',\ '锻炼总消费额','学习消费总额','充值日平均消费额最大值','锻炼日消费最大值','小吃日消费最大值','学习日消费最大值','超市日消费最大值','餐厅日消费最大值','其他类别日消费最大值',\ '充值月消费最大值','锻炼月消费最大值','小吃月消费最大值','学习月消费最大值','食堂月消费最大值','超市月消费最大值','其他类别月消费最大值','充值月消费最小值','锻炼月消费最小值','小吃月消费最小值',\ '学习月消费最小值','食堂月消费最小值','超市月消费最小值','其他类别月消费最小值','总消费次数','食堂消费额占总消费额的比例','食堂消费次数','总消费额','食堂消费的中位数','超市消费的中位数','充值消费的中位数','小吃消费的中位数',\ '锻炼消费的中位数','学习消费的中位数','其他类别消费的中位数','食堂消费的平均值','超市消费的平均值','充值消费的平均值','小吃消费的平均值',\ '锻炼消费的平均值','学习消费的平均值','其他类别消费的平均值','食堂消费的方差','超市消费的方差','充值消费的方差','其他类别消费的方差','小吃消费的方差',\ '锻炼消费的方差','学习消费的方差','能否毕业',] result={} if student is None: return None else: UserName=student[0][0:14] student_name=student[1] student_type=student[2] school_year=student[22] j=0 for i in range(len(student)): if i!=0 and i!=1 and i!=2 and i!=22: if student[i] is None or int(student[i])==0: result[column_name[j]]=float(0) else: result[column_name[j]]=student[i] j=j+1 return result
def Search_student_info(student_num): from background_program.z_Tools.my_database import MyDataBase try: db = MyDataBase("软件学院") executer = db.getExcuter() sql="select * from students where student_num like '%{0}%'".format(student_num) executer.execute(sql) student = list(executer.fetchone()) column_name=['学号','姓名','学生类别','学年参与活动数量','参与活动的平均活跃程度','活动持续时间','活动平均参与分','表彰级别',\ '平均每学年获荣誉次数','图书借阅次数','学年学习总时间','周末自习时间','GPA','成绩排名','助学金等级','助学金金额','挂科科目数',\ '重修通过的科目数量','重修未过的科目数量','社会实践参与总时间','社会实践参与是否重点','平均每日进出次数','学年','奖学金等级','奖学金金额',\ '成绩','周末平均最早出宿舍时间','平均周末最迟回宿舍时间','工作日平均在外时间','食堂总消费额','超市总消费额','其他类别总消费额','充值总额','小吃消费总额',\ '锻炼总消费额','学习消费总额','充值日平均消费额最大值','锻炼日消费最大值','小吃日消费最大值','学习日消费最大值','超市日消费最大值','餐厅日消费最大值','其他类别日消费最大值',\ '充值月消费最大值','锻炼月消费最大值','小吃月消费最大值','学习月消费最大值','食堂月消费最大值','超市月消费最大值','其他类别月消费最大值','充值月消费最小值','锻炼月消费最小值','小吃月消费最小值',\ '学习月消费最小值','食堂月消费最小值','超市月消费最小值','其他类别月消费最小值','总消费次数','食堂消费额占总消费额的比例','食堂消费次数','总消费额','食堂消费的中位数','超市消费的中位数','充值消费的中位数','小吃消费的中位数',\ '锻炼消费的中位数','学习消费的中位数','其他类别消费的中位数','食堂消费的平均值','超市消费的平均值','充值消费的平均值','小吃消费的平均值',\ '锻炼消费的平均值','学习消费的平均值','其他类别消费的平均值','食堂消费的方差','超市消费的方差','充值消费的方差','其他类别消费的方差','小吃消费的方差',\ '锻炼消费的方差','学习消费的方差'] result={} for i in range(len(student)): result[column_name[i]]=student[i] return result except: pass
class DataCarer(): def __init__(self, label_name, school_year, usage="regression"): ''' @params label_name:xxx...... @return ''' self.usages = ["regression", "classification"] self.usage = usage if usage not in self.usages: print('用法错误:%s' % usage) elif usage == "regression": self.table_name = "students_float" elif usage == "classification": self.table_name = "students_int" self.label_name = label_name self.school_year = school_year self.labellist = [] def create_train_dataSet(self): ''' 获取训练数据 @params @return numpy.mat X_train:特征,numpy.mat Y_train:标签 list labellist:特征项名称的集合 ''' """确定label是哪一列,并将其作为待预测对象""" self.open_database("软件学院") self.executer.execute("DESCRIBE {0}".format(self.table_name)) columnName = self.executer.fetchall() index = -1 for i in range(2, len(columnName)): if str(columnName[i][0]) == self.label_name: index = i else: self.labellist.append(columnName[i][0]) if index == -1: print('异常:未发现' + self.label_name) """先获得数据库表中的全部学生的数据""" self.executer.execute("select * from {0}".format(self.table_name)) dataSet = [] for i in self.executer.fetchall(): student = Student(student_num=i[0], features=list(i[2:index]) + list(i[index + 1:]), label=i[index]) dataSet.append(student.getAll()) dataSet = np.array(dataSet) self.close_database() dataSet = mat(dataSet) X_train, Y_train = mat(dataSet[:, :-1]), mat(dataSet[:, -1]) # self.pre_process(X_train) return X_train, Y_train def create_validate_dataSet(self): ''' 获取测试数据 @params @return list[Student] students:学生列表,numpy.mat X_test:特征 ''' """确定label是哪一列,并将其作为待预测对象""" self.open_database("软件学院") self.executer.execute("DESCRIBE {0}".format(self.table_name)) columnName = self.executer.fetchall() index = -1 for i in range(len(columnName)): if str(columnName[i][0]) == self.label_name: index = i break if index == -1: print('异常:未发现' + self.label_name) """获得所有学生的数据""" sql = "select * from {0} where right(student_num,4) in('{1}','2016')" self.executer.execute(sql.format(self.table_name, self.school_year)) students, X_test = [], [] for i in self.executer.fetchall(): student = Student(student_num=i[0], features=list(i[2:index]) + list(i[index + 1:]), label=i[index]) X_test.append(student.features) students.append(student) A = np.array(X_test) X_test = mat(X_test) self.close_database() self.pre_process(X_test) return students, X_test def pre_process(self, X): ''' Created on 2017年12月20日 对数据进行初级的预处理 @author: Jack @params @return ''' from background_program.b_Sample_processing.PreProcessing.MyImputer import MyImputer MyImputer().transformer.fit_transform(X) def get_feature_range(self, feature_name, label_name, label_min, label_max): self.open_database("软件学院") sql = 'select min({0}),max({0}) from {1} where {2} >= {3} and {2} < {4}'.\ format(feature_name, 'students_float', label_name, label_min, label_max) try: self.executer.execute(sql) result = self.executer.fetchone() feature_min, feature_max = result[0], result[1] self.close_database return feature_min, feature_max except: return 0, 0 def open_database(self, database_name): self.db = MyDataBase(database_name) self.executer = self.db.getExcuter() def close_database(self): self.executer.close() self.db.close()
def open_database(self, database_name): self.db = MyDataBase(database_name) self.executer = self.db.getExcuter()
class FeatureCalculater: ''' 特征计算器的父类 ''' def __init__(self, database="软件学院", feature_name=None): from background_program.z_Tools.my_database import MyDataBase self.db = MyDataBase(database) self.executer = self.db.getExcuter() self.feature_name = feature_name self.this_year = 2017 # 一个同学一个学年作为一个记录 def get_school_year(self, student_num): in_year = int(student_num[3:7]) school_year = [str(i) for i in range(in_year, self.this_year + 1)] return school_year @my_exception_handler def add_student(self, student_num): ''' 添加学生 @params string student_num:学生学号 @retrun ''' sql = "insert into students(student_num) values('{0}')".format( student_num) self.executer.execute(sql) @my_exception_handler def calculate(self): ''' 获取学号 所有子类都要实现这个函数 特征值的计算 @params @retrun ''' sql = "select distinct student_num,student_name,grade,student_type from subsidy" self.executer.execute(sql) result = self.executer.fetchall() student_types = ['普通高校本科学生', '硕士研究生', '交流生', '普通进修生', '硕士专业学位研究生'] student_type = 0 for re in result: count = int(re[2]) for s_i in range(len(student_types)): if re[3] == student_types[s_i]: student_type = s_i break if student_type == 0: while count <= int(re[2]) + 8 and count <= 2017: sql = "insert into students(student_num,student_name,student_grade,student_type) values(%s,%s,%s,%s)" self.executer.execute(sql, (str(re[0]) + str(count), re[1], str(re[2]), student_type)) count = count + 1 else: while count <= int(re[2]) + 8 and count <= 2017: sql = "insert into students(student_num,student_name,student_grade,student_type) values(%s,%s,%s,%s)" self.executer.execute(sql, (str(re[0]) + str(count), re[1], str(re[2]), student_type)) count = count + 1 @my_exception_handler def cluster(self, feature_min=None, feature_max=None, clusters=1): from sklearn.cluster import KMeans import numpy ''' 所有子类都要实现这个函数 对特征值进行聚类来归一化处理 @params @retrun ''' # 获得学生的数据 sql = 'SELECT {0} FROM students_float WHERE {1} is not null and {2} <> 0'.format( self.feature_name, self.feature_name, self.feature_name) count = self.executer.execute(sql) result = self.executer.fetchall() # count是行数 dataSet2 = list(result) dataSet = numpy.array([i[0] for i in result]).reshape(count, 1) # 聚类 kmeans = KMeans(n_clusters=clusters, random_state=0).fit(dataSet) center = kmeans.cluster_centers_ # 求出聚类中心 center_x = ['%0.4f' % center[i][0] for i in range(len(center))] labels = kmeans.labels_ # 标注每个点的聚类结果 # 写入student_rank表 types, maxx, minn = [[] for i in range(0, clusters)], [], [] dataSet_quene = list(set(dataSet2)) print(len(dataSet_quene)) for i in range(len(labels)): types[labels[i]].append(dataSet[i][0]) if len(dataSet_quene) > 0: for j in range(len(dataSet_quene)): if dataSet[i][0] == dataSet_quene[j]: del dataSet_quene[j] print(len(dataSet_quene)) sql = 'select student_num from students_float where {0}={1}'.format( self.feature_name, str((dataSet[i][0]))) self.executer.execute(sql) student_nums = self.executer.fetchall() for student_num in student_nums: student_num = student_num[0] sql = "update students_int set {0} = {1} where student_num = {2}".format( self.feature_name, str(labels[i] + 1), student_num) n_update = self.executer.execute(sql) if n_update == 0: try: sql = 'insert into students_int(student_num) values({0})'.format( student_num) self.executer.execute(sql) except: pass sql = "update students_int set {0} = {1} where student_num = {2}".format( self.feature_name, str(labels[i] + 1), student_num) self.executer.execute(sql) break # xxxxxx if feature_max is None and feature_min is None: for i in range(0, clusters): maxx.append(max(types[i])) minn.append(min(types[i])) maxx, minn, cent = sorted(maxx), sorted(minn), sorted( [float(i) for i in center_x]) for i in range(len(maxx) - 1): if maxx[i] < minn[i + 1]: temp = (float(maxx[i]) + float(minn[i + 1])) / 2.0 minn[i + 1] = (float(maxx[i]) + float(minn[i + 1])) / 2.0 maxx[i] = temp sql = 'SELECT max({0}) FROM students_float'.format( self.feature_name) self.executer.execute(sql) max_num = int(self.executer.fetchone()[0]) maxx[len(maxx) - 1] = max_num else: maxx, minn, cent = feature_max, feature_min, (feature_max + feature_min) / 2.0 # 将聚类范围保存 with open(r"Cluster_Feature", "a", encoding='utf8') as f: f.write(str(self.feature_name) + '\n') f.write( str(0) + ':' + str(0) + ' ' + str(0) + ' ' + str(minn[0]) + '\n') # 手动加入第一区间 for i in range(len(cent)): f.write( str(i + 1) + ':' + str(cent[i]) + ' ' + str(minn[i]) + ' ' + str(maxx[i]) + '\n') def tearDown(self): ''' teardown @params @retrun ''' self.db.close()