def create_user(self, user): user = User(username=user['username'], password=user['password'], email=user['email']) session.add(user) session.commit() return
def post(self, headers): data = self.parser.parse_args() print(data) # json_data = request.get_json(force=True) # if not json_data: # return {'message': 'No input data provided'}, 400 # data, errors = sopFileSchema().load(json_data) # if errors: # return errors,422 # sop_name = sopFile.query.filter_by(sop_name=json_data['sop_name']).first() # if sop_name: # return {'message': 'user already exists'}, 400 #变量如果没有会怎样,需要指定阶段文件? file = data.get('file') if not file: return {'message': 'No input g file provided'}, 400 try: filename = secure_filename(file.filename) file.save(os.path.join('./files/', filename)) except: return {'message': 'file save error'}, 400 sop = sopFile(sop_name=data.get('sop_name'), study_id=data.get('study_id'), user_id=data.get('user_id'), sop_date_create=datetime.datetime.now(), sop_description=data.get('sop_description')) session.add(sop) session.commit() return {'message': 'success', 'filename': file.name}
def post(self, headers): json_data = request.get_json(force=True) if not json_data: return {'message': 'No input data provided'}, 400 print(json_data) #接收的日期格式为2014-08-11T05:26:03.869245 # data, errors = StudySchema().load(json_data, session=session) # if errors: # return errors,422 name = Study.query.filter_by(name=json_data['name']).first() if name: return {'message': 'name already exists'}, 400 #变量如果没有会怎样,需要指定阶段文件? study = Study( name=json_data['name'], status_id=json_data['status_id'], user_id=json_data['user_id'], description=json_data['description'], plan_start_date=json_data['plan_start_date'], plan_end_date=json_data['plan_end_date'] # plan_start_date = datetime.datetime.strptime(json_data['plan_start_date'], "%Y-%m-%d %H:%M:%S"), # plan_end_date = datetime.datetime.strptime(json_data['plan_end_date'],"%Y-%m-%d %H:%M:%S") ) session.add(study) session.commit() return {'message': 'success', 'filename': study.study_id}
def add_api(self, id, data): api = Api(api_name=data['api_name'], description=data['description'], user_id=id) session.add(api) session.commit() return
def changed_user(self, _id, new_user): old_user_info = User.query.get(_id) session.delete(old_user_info) new_user = User(id=new_user['id'], username=new_user['username'], password=new_user['password'], email=new_user['email']) # , apis=new_user['apis']) session.add(new_user) session.commit() return
def get_jd_data(self, key): page = self.search(key) bs = BeautifulSoup(page, features="html.parser") count = 0 while count <= 50: items = bs.select("#J_goodsList > .gl-warp > .gl-item") for item in items: try: img_box = item.select_one("div.p-img > a") tag_str = img_box["title"] href = img_box["href"] if not href.startswith("https"): href = "https:" + href if self.is_spider(href): print("the href %s is spidered" % href) continue price = item.select_one( "div.p-price > strong > i").get_text() origin_price = price title = item.select_one(".p-name > a > em").get_text() merchant = item.select_one( ".p-shop > .J_im_icon > a").get_text() model = airConditioner(link=href, merchant=merchant, tag_str=tag_str, title=title, price=float(price) * 100, origin_price=float(origin_price) * 100, platform=self.website_name) session.add(model) session.commit() # self.save_href(href) except Exception as e: print("merchant message: %s" % href) print(e) continue # 爬取后续数据 self.get_detail(href) # 获取下一页 next_page_btn = self.get_driver().find_element_by_css_selector( "#J_bottomPage > span.p-num > a.pn-next") if next_page_btn is None: break count = count + 1 next_page_btn.click() # 等待页面加载 JD的商品页会下拉加载, 强制sleep 5s self.scroll() time.sleep(5) bs = BeautifulSoup(self.page_source, features="html.parser")
def post(self, headers): json_data = request.get_json(force=True) if not json_data: return {'message': 'No input data provided'}, 400 #接收的日期格式为2014-08-11T05:26:03.869245 # data, errors = ProjectSchema().load(json_data, session=session) # if errors: # return errors,422 name = Project.query.filter_by( projectName=json_data['projectName']).first() if name: return {'message': 'name already exists'}, 400 #变量如果没有会怎样,需要指定阶段文件? project = Project( projectName=json_data['projectName'], projectStage=json_data['projectStage'], projectBriefIntroduction=json_data['projectBriefIntroduction'], # projectCreatorID = json_data['projectCreatorID'], projectCreatedTime=time.ctime(time.time()), projectExpectedStartTime=json_data['projectExpectedStartTime'], projectActualStartTime=json_data['projectActualStartTime'], projectExpectedEndTime=json_data['projectExpectedEndTime'], projectActualEndTime=json_data['projectActualEndTime'], projectSponsor=json_data['projectSponsor'], projectInvestigator=json_data['projectInvestigator'], projectMonitor=json_data['projectMonitor'], projectStatistician=json_data['projectStatistician'], projectComment1=json_data['projectComment1'], projectComment2=json_data['projectComment2'], projectComment3=json_data['projectComment3'], projectComment4=json_data['projectComment4'], ) print(json_data) session.add(project) db.session.commit() # db.session.execute( # userProject.__table__.insert(), # [{"projectID":project.projectID , "userID": json_data["projectInvolvedUsersID"][i], "userType":"2"} for i in range(len(json_data["projectInvolvedUsersID"]))] # # ) # db.session.commit() # db.session.execute( # userProject.__table__.insert(), # [{"projectID": project.projectID, "userID": json_data['projectCreatorID'], "userType": "1"} # ] # # ) # db.session.commit() return {'statusCode': '1', 'projectID': project.projectID}
def post(self, headers): data = self.parser.parse_args() print(data) # json_data = request.get_json(force=True) # if not json_data: # return {'message': 'No input data provided'}, 400 # data, errors = taskTemplatesSchema().load(json_data) # if errors: # return errors,422 # sop_name = taskTemplates.query.filter_by(sop_name=json_data['sop_name']).first() # if sop_name: # return {'message': 'taskTemplate already exists'}, 400 #变量如果没有会怎样,需要指定阶段文件? template = request.files['template'] if not template: return {'message': 'No input template provided'}, 400 template_name = Template.query.filter_by(templateName=data.get('templateName')).first() template_URL = Template.query.filter_by(templateDownloadURL=template.filename).first() templateCreatorRealName = session.query(User).filter_by(userID=headers["userID"]).first().userRealName taskTemplate = Template( templateName=data.get('templateName'), templateDescription = data.get('templateDescription'), templateCreateDate = time.ctime(time.time()), templateCreatorID = headers["userID"], templateStatus = data.get('templateStatus'), templateRemoveDate = None, templateRemoveExecutorID = None, templateDeleteDate = None, templateDeleteExecutorID = None, templateDownloadURL = template.filename, templateCreatorRealName = templateCreatorRealName ) if template_name: return {'message': 'taskTemplate already exists'}, 400 if template_URL: return {'message': 'Template file already exists, please rename'}, 400 #略掉中文字符,文件不要有中文字符 # templatename = secure_templatename(template.templatename) template.save(os.path.join('./static/', template.filename)) session.add(taskTemplate) session.commit() #return {'message': 'template save error'}, 400 return {"statusCode": "1"}
def post(self, headers): json_data = request.get_json(force=True) if not json_data: return {'message': 'No input data provided'}, 400 print(json_data) #接收的日期格式为2014-08-11T05:26:03.869245 # data, errors = TaskSchema().load(json_data, session=session) # if errors: # return errors,422 name = Task.query.filter_by(taskName=json_data['taskName']).first() if name: return {'message': 'name already exists'}, 400 #变量如果没有会怎样,需要指定阶段文件? taskBelongedToProjectName = session.query(Project).filter_by( projectID=json_data['taskBelongedToProjectID']).first().projectName taskExecutorRealName = session.query(User).filter_by( userID=headers['userID']).first().userRealName task = Task( taskName=json_data['taskName'], taskBelongedToProjectID=json_data['taskBelongedToProjectID'], #belongedToTrialName = json_data['belongedToTrialName'], taskCreatorID=headers['userID'], #taskCreatorName = json_data['taskCreatorName'], taskCreatedTime=time.strftime("%Y-%m-%d", time.localtime()), taskExecutorID=json_data['taskExecutorID'], #taskExecutorName = json_data['taskExecutorName'], taskReceivedStatus=json_data['taskReceivedStatus'], taskDueTime=json_data['taskDueTime'], taskProgress=json_data['taskProgress'], taskCompletedStatus=None, taskDescription=json_data['taskDescription'], taskActualCompletedTime=None, taskBelongedToProjectName=taskBelongedToProjectName, taskExecutorRealName=taskExecutorRealName) session.add(task) session.commit() executor = User.query.filter( User.userID == task.taskExecutorID).first() taskBelongedToProject = Project.query.filter( Project.projectID == task.taskBelongedToProjectID).first() sendMail("任务分配通知", executor.userEmail, executor.userRealName, taskBelongedToProject.projectName, task.taskName, task.taskDescription, task.taskDueTime.strftime("%Y-%m-%d")) return {"statusCode": "1", "taskID": task.taskID}
def post(self, headers): print(headers['userID']) #需要判断该用户是否有增加新用户的权限 json_data = request.get_json(force=True) if not json_data: return {'message': 'No input data provided'}, 400 print(json_data) # data, errors = UserSchema().load(json_data, session=session) # if errors: # return errors,422 user = User.query.filter_by(username=json_data['username']).first() if user: return {'message': 'user already exists'}, 400 #变量如果没有会怎样 user = User(username=json_data['username'], userRealName=json_data['userRealName'], password=json_data['password'], userEmail=json_data['userEmail'], isAdmin=False, userAccountStatus=json_data['userAccountStatus'], userLastLoginTime=None) session.add(user) session.commit() if "userInvolvedProjectsID" in json_data: db.session.execute( userProject.__table__.insert(), [{ "userID": user.userID, "projectID": json_data["userInvolvedProjectsID"][i], "userType": "2" } for i in range(len(json_data["userInvolvedProjectsID"]))]) db.session.commit() if 'userCanManageProjectsID' in json_data: db.session.execute( userProject.__table__.insert(), [{ "userID": user.userID, "projectID": json_data['userCanManageProjectsID'][i], "userType": "1" } for i in range(len(json_data["userCanManageProjectsID"]))]) db.session.commit() sendMail("新用户创建成功", user.userEmail, user.userRealName, user.username, user.password) return {"statusCode": "1"}
def get_page_message(self, href): print(href) self.driver.get(href) try: login_btn = self.driver.find_element_by_link_text("请登录") if login_btn is not None: self.login() self.driver.get(href) except Exception as e: # print(e) print() bs4 = BeautifulSoup(self.driver.page_source, features="html.parser") basic = bs4.select_one(".tb-property") price = origin_price = score = 0 tags_str = labels_str = title = "" if basic is not None: title_box = basic.select_one(".tb-detail-hd > h1 > a") if title_box is None: return # 标题 title = title_box.get_text() # 标签 tag = basic.select_one( "#J_DetailMeta > div.tm-clear > div.tb-property > div > div.tb-detail-hd > p" ) if tag is not None: tags = helper.get_char(tag.get_text()) tags_str = ",".join(tags) # 当前售价 price = self.get_price(basic) if isinstance(price, str) and price.find("-") != -1: price = str.split(price, "-")[0] # 原价 origin_price = self.get_origin_price(basic) # 商家 merchant = bs4.select_one( "#shopExtra > div.slogo > a > strong").get_text() parameter_list = bs4.select( ".tm-tableAttr > tbody > tr:not(.tm-tableAttrSub)") # 属性 parameter = {} if parameter_list is not None: for item in parameter_list: parameter[item.select_one("th").get_text()] = item.select_one( "td").get_text() ignore = False # 用户评价需要点击后加载 # 淘宝发爬虫, 一定情况下会弹出滑动验证码, 弹出后跳过 try: self.driver.execute_script("window.scrollTo(0, 200)") tags_button = self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, "#J_TabBar > li:nth-child(3)"))) tags_button.click() self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, "#J_Reviews > div > " "div.rate-header"))) except Exception as e: print(e) ignore = True if ignore is False: bs4 = BeautifulSoup(self.driver.page_source, features="html.parser") rate_box = bs4.select_one("#J_Reviews > div > div.rate-header") if rate_box is None: rate_box = bs4.select_one( "#J_Reviews > div > div.rate-header.rate-header-tags") if rate_box is not None: # 好评率 score = rate_box.select_one(".rate-score > strong") if score is not None: # 跟苏宁评分规则不一致, 转化成百分制 score = self.__transToCentesimal(score.get_text()) tag_list = rate_box.select(".rate-tag-inner > span > a") if tag_list is not None: # 评价标签, 部分商品不存在评价标签 labels = [ helper.get_char(tag.get_text())[0] for tag in tag_list ] labels_str = ",".join(labels) else: print("not found tag list") model = Conditioner.airConditioner(tag_str=tags_str, link=self.driver.current_url, title=title, merchant=merchant, property=parameter, price=float(price) * 100, origin_price=float(origin_price) * 100, feedback=score, labels=labels_str, platform=self.platform) # 转化成百分制 session.add(model) try: session.commit() redis_client.sadd(self.spider_key, href) except exc.SQLAlchemyError as e: print("insert taobao data failed.", e) return
def post(self, headers): data = self.parser.parse_args() print(data) # json_data = request.get_json(force=True) # if not json_data: # return {'message': 'No input data provided'}, 400 # data, errors = taskFilesSchema().load(json_data) # if errors: # return errors,422 # sop_name = taskFiles.query.filter_by(sop_name=json_data['sop_name']).first() # if sop_name: # return {'message': 'taskFile already exists'}, 400 #变量如果没有会怎样,需要指定阶段文件? file = request.files['file'] if not file: return {'message': 'No input file provided'}, 400 file_name = File.query.filter_by(fileName=data.get('fileName')).first() file_URL = File.query.filter_by(fileDownloadURL=file.filename).first() fileBelongedToTaskName = session.query(Task).filter_by( taskID=data.get('fileBelongedToTaskID')).first().taskName fileBelongedToProjectName = session.query(Project).filter_by( projectID=data.get('fileBelongedToProjectID')).first().projectName fileCreatorName = session.query(User).filter_by( userID=headers["userID"]).first().username taskFile = File( fileName=data.get('fileName'), fileDescription=data.get('fileDescription'), fileBelongedToTaskID=data.get('fileBelongedToTaskID'), fileBelongedToProjectID=data.get('fileBelongedToProjectID'), fileCreateDate=time.ctime(time.time()), fileCreatorID=headers["userID"], fileStatus=data.get('fileStatus'), fileRemoveDate=None, fileRemoveExecutorID=None, fileDeleteDate=None, fileDeleteExecutorID=None, fileDownloadURL=file.filename, fileBelongedToTaskName=fileBelongedToTaskName, fileBelongedToProjectName=fileBelongedToProjectName, fileCreatorName=fileCreatorName # fileName = data.get('fileName'), # createDate = data.get('createDate'), # creatorID = data.get('creatorID'), # # self.creatorName = creatorName # description = data.get('description'), # deleteDate = data.get('deleteDate'), # deleteExecutorID = data.get('deleteExecutorID'), # # self.deleteExecutorName = deleteExecutorName # downloadURL = file.filename ) if file_name: return {'message': 'taskFile already exists'}, 400 if file_URL: return {'message': 'File already exists, please rename'}, 400 #略掉中文字符,文件不要有中文字符 # filename = secure_filename(file.filename) file.save(os.path.join('./static/', file.filename)) session.add(taskFile) session.commit() #return {'message': 'file save error'}, 400 return {"statusCode": "1"}
def add_endpoint(self, id, data): endp = Endpoint(uri=data['uri'], api_id=id) session.add(endp) session.commit() return
def getSuNing(page=0, offset=0): while True: ins = sn.sn() url = ins.get_list_url(page, offset) response = requests.get(url) if response.status_code != 200: print("获取苏宁家电异常, url:%s, code:%d" % (url, response.status_code)) return ct = response.headers["Content-type"].split("charset=")[1].lower() bs = BeautifulSoup(response.content, features="html.parser", from_encoding=ct) data = bs.find_all("li", class_="basic") if not data: break for item in data: box = item.select_one(".product-box") img_box = box.select_one(".res-img > .img-block > a") store_box = box.select_one(".store-stock > a") href = img_box["href"] if not href.startswith("http"): href = "https:" + href if redis_client.sismember(REDIS_KEY, href) > 0: print("the url is scraped. url:%s" % href) continue print("fetch href: %s" % href) # 空调相关的参数 parameter = ins.get_parameter(href) # 获取售卖价格 price = ins.get_price() # 获取原价 origin_price = ins.get_origin_price() # 获取好评率 score = ins.get_evaluate_score() # 获取评价标签 labels = ins.get_evaluate_labels() # 插入DB model = Conditioner.airConditioner( tag_str=img_box["title"], link=href, title=img_box.select_one("img")["alt"], merchant=store_box.get_text(), property=parameter, price=price * 100, origin_price=origin_price * 100, feedback=score, labels=",".join(labels), platform=ins.platform) session.add(model) redis_client.sadd(REDIS_KEY, href) try: session.commit() except exc.SQLAlchemyError as e: print("sql failed", e) continue # 苏宁的web规则, paging的值为0~3 if offset <= 3: offset = offset + 1 else: page = page + 1 offset = 0 if page > 50: break