def csv_to_database(file_path): global df try: with open(file_path, 'rb') as e: #modi ans = chardet.detect(e.read(10000)) ans = ans.get('encoding') print("encoding:", ans, '\nend_detect') enc = 'utf-8' if (ans == "utf-8") else 'gbk' reader = pd.read_csv(os.path.normpath(file_path), encoding=enc, sep=',', iterator=True, dtype={ 'id': str, }) i = 0 while True: try: start2 = time.clock() df = reader.get_chunk(1000000) i = i + 1 end2 = time.clock() # 每次循环结束时间 print('csv读取 : {} 秒: completed {} rows'.format( end2 - start2, i * 1000000)) except StopIteration: print("Iteration is stopped.") # 循环结束退出 break print('读取完毕,写入数据库hhhh') start3 = time.clock() query_list = [] for i, r in df.iterrows(): #user_cur.save() #每次save都访问一次数据库,效率太低 if not AppUser.objects.filter(id=r['id']).exists(): query_list.append(AppUser(**(r.to_dict()))) AppUser.objects.bulk_create(query_list) end3 = time.clock() print('数据库写入 : {} 秒'.format(end3 - start3, )) return "200" except Exception as e: return str(e)
def user(request, user_id): """ :param request: :param user_id: 在url中指定某一位用户 :return: """ if user_id == '-1': # 当没选中任何用户时前端传入-1表示第一条 the_user = AppUser.objects.first() else: the_user = AppUser.objects.filter(pk=user_id).first() # 获取queryset # print(type(the_user),type(QuerySet(the_user.first()))) if request.method == 'GET': if the_user: user_info = model_to_dict(the_user) data = dict(status=200, msg='ok', user_info=user_info) else: data = { 'status': 404, 'msg': 'not_found', } return JsonResponse(data=data) elif request.method == 'POST': # 如果用户存在就是更新,不存在就是增加 # ps form里不能写pk data = request.POST.dict() # 把querydict转为dict try: data['pre_target'] # 必须要pre_target字段 if not the_user: # 用户不存在 the_user = AppUser(pk=user_id, **data) print('不存在,已创建') the_user.save() # print(type(the_user),the_user) #都是单个obj else: for key, value in data.items(): setattr(the_user, key, value) the_user.save() print('存在,已更新') # print(type(the_user), the_user) data = { 'status': 201, 'msg': 'modify/append success', 'user_info': {'id': user_id, **data}, } return JsonResponse(data=data) except Exception as e: print(e) data = { 'status': 400, 'msg': str(e), } return JsonResponse(data=data) elif request.method == 'DELETE': if the_user: the_user.delete() data = { 'status': 204, 'msg': 'delete success' } print('删除成功') else: data = { 'status': 404, 'msg': 'not_found' } print('未找到删除用户') return JsonResponse(data=data) data = { 'status': 404, 'msg': 'operation invalid' } print(data) return JsonResponse(data=data)
def fn_validate(username, password): params = {'eid': username, 'pw': password, 'realm': '', 'submit': 'Login'} sess = requests.Session() r = sess.post("https://sakai.claremont.edu/portal/relogin", data=params) if (re.search('Alert: Invalid login', r.text)): return False; try: appuser = AppUser.objects.get(eid=username) except: appuser = AppUser(eid=username) appuser.last_update = datetime.datetime.now() appuser.save() try: sakaiuser = SakaiUser.objects.get(eid=username) except: sakaiuser = SakaiUser(eid=username,contributor=appuser) sakaiuser.save() sess = requests.Session() m = re.search('icon-sakai-membership " href="(?P<addr>.*?)"',r.text) membership_url = m.group('addr') r = sess.post(membership_url, data=params) m = re.search(re.compile(r'<iframe.*?src="(?P<addr>.*?)"', re.DOTALL), r.text) membership_content_url = m.group('addr') r = sess.post(membership_content_url, data=params) site_urls = re.findall(re.compile(r'<h4><a href="(?P<addr>.*?)".*?>(?P<name>.*?)</a>', re.DOTALL), r.text) for (site_url,site_name) in site_urls: # Process site info r = sess.get(site_url.replace("/portal/","/direct/")+".json") try: site_json = json.loads(r.text) if (site_json['type'] != "course"): continue site_name = site_json['shortDescription'] site_entityID = site_json['entityId'] except KeyError: continue # Extract info site_lst = re.split(" |\n",site_name) # Insert into database try: new_site = SakaiSite.objects.get(sid=site_name) except: new_site = SakaiSite(sid=site_name,contributor=appuser) new_site.url = site_entityID new_site.save() try: s = SakaiEnroll.objects.filter(eid=sakaiuser,sid=new_site)[0] except: s = SakaiEnroll(eid=sakaiuser,sid=new_site,role="TBD") s.save() return True
def process(request): try: username = request.session["eid"] password = request.session["pwd"] user = AppUser.objects.get(eid=username) except: return HttpResponse("You need to login.") params = {'eid': username, 'pw': password, 'realm': '', 'submit': 'Login'} sess = requests.Session() r = sess.post("https://sakai.claremont.edu/portal/relogin", data=params) if (re.search('Alert: Invalid login', r.text)): return HttpResponse("Invalid login.") try: this_user = AppUser.objects.get(eid=username) except: this_user = AppUser(eid=username) this_user.save() sess = requests.Session() sess.post("https://courseshare.herokuapp.com/", data=params) m = re.search('icon-sakai-membership " href="(?P<addr>.*?)"',r.text) membership_url = m.group('addr') r = sess.post(membership_url, data=params) m = re.search(re.compile(r'<iframe.*?src="(?P<addr>.*?)"', re.DOTALL), r.text) membership_content_url = m.group('addr') r = sess.post(membership_content_url, data=params) site_urls = re.findall(re.compile(r'<h4><a href="(?P<addr>.*?)".*?>(?P<name>.*?)</a>', re.DOTALL), r.text) for (site_url,site_name) in site_urls: # Process site info r = sess.get(site_url.replace("/portal/","/direct/")+".json") try: site_json = json.loads(r.text) if (site_json['type'] != "course"): continue site_name = site_json['shortDescription'] site_entityID = site_json['entityId'] except KeyError: continue # Extract info site_lst = re.split(" |\n",site_name) # Insert into database try: new_site = SakaiSite.objects.get(sid=site_name) except: new_site = SakaiSite(sid=site_name,contributor=this_user) if not new_site.no_update: new_site.url = site_entityID new_site.campus = site_lst[0][:2] new_site.discipline = " ".join(site_lst[1:len(site_lst)-2]) try: ind = site_lst[-2].index('.') new_site.course_no = site_lst[-2][:ind] new_site.course_sec = site_lst[-2][ind+1:] except: new_site.course_no = site_lst[-2] new_site.semester = site_lst[-1] new_site.save() #################### ## LINK TO PORTAL ## #################### try: subject = Subject.objects.get(name=new_site.discipline) no = new_site.course_no if no[-1].isdigit(): if len(no) < 3: no = "0"*(3-len(no))+no elif no[-2].isdigit(): if len(no) < 4: no = "0"*(4-len(no))+no else: raise Exception("unable to process " + no) portal_course = PortalCourse.objects.get(subject=subject,course_no=no,semester=new_site.semester,program=new_site.campus) new_site.portal_link = portal_course new_site.save() except: pass # assert False,str(subject.code)+"--"+str(new_site.course_no)+"--"+str(new_site.semester) r = sess.post(site_url, data=params) # c m = re.search('icon-sakai-site-roster " href="(?P<addr>.*?)"',r.text) if not m: continue roster_url = m.group('addr') r = sess.post(roster_url, data=params) # c m = re.search(re.compile(r'<iframe.*?src="(?P<addr>.*?)"', re.DOTALL), r.text) roster_content_url = m.group('addr') r = sess.post(roster_content_url, data=params) # c m = re.findall('<tr><td>.*?mailto:.*?</td></tr>', r.text) if len(SakaiEnroll.objects.filter(sid=new_site)) == len(m): if (datetime.datetime.now() - new_site.last_update).days == 0: continue; for student_line in m: mm = re.search('<td>(?P<name>.*?)</td><td>(?P<eid>.*?)</td><td><a.*?>(?P<email>.*?)</a></td><td>(?P<role>.*?)</td>', student_line) if (mm.group('name')[0]=='<'): mm = re.search('<td><a.*?>(?P<name>.*?)</a></td><td>(?P<eid>.*?)</td><td><a.*?>(?P<email>.*?)</a></td><td>(?P<role>.*?)</td>', student_line) try: new_student = SakaiUser.objects.get(eid=mm.group('eid')) except: new_student = SakaiUser(eid=mm.group('eid'),contributor=this_user) names = mm.group('name').split(',') if len(names) == 1: first_name = names[0].strip() last_name = None else: assert len(names) == 2, names last_name,first_name = map(lambda x:x.strip(),names) new_student.first_name=first_name new_student.last_name=last_name new_student.email=mm.group('email').strip() #################### ## LINK TO ROSTER ## #################### if new_student.eid[-4:]=="@hmc": try: roster_student = Student.objects.get(email=new_student.email) new_student.roster_link = roster_student except: pass #################### ## LINK TO Portal ## #################### if mm.group('role') == "Instructor": try: professor = Professor.objects.filter(last_name=last_name) if len(professor) == 1: if not professor[0].first_initial or professor[0].first_initial==first_name[0]: new_student.portal_link = professor[0] elif len(professor) > 1: professor = professor.filter(first_initial=first_name[0]) if professor: new_student.portal_link = professor[0] except: pass new_student.save() try: s = SakaiEnroll.objects.filter(eid=new_student,sid=new_site)[0] s.role=mm.group('role') except: s = SakaiEnroll(eid=new_student,sid=new_site,role=mm.group('role')) s.save() new_site = SakaiSite.objects.get(sid=site_name) new_site.last_update = datetime.datetime.now() new_site.save() user = AppUser.objects.get(eid=username); user.f_processed = True; user.save(); return HttpResponse("ok")