def run(self): while True: try: self.report() except Exception as e: send_message(subject="系统错误报告", content=[get_exception_info(e)], attachments=[]) self.restart()
def on_message(ws, message): global closes json_message = json.loads(message) #pprint.pprint(json_message) candle = json_message['k'] is_candle_closed = candle['x'] close = candle['c'] if True: #change to if is_candle_closed, now true for testing purposes print("candle closed at {}".format(close)) closes.append(float(close)) #sender.send_message(f"Current Etherium price: {close}") if len(closes) > RSI_PERIOD: np_closes = numpy.array(closes) rsi = talib.RSI(np_closes, RSI_PERIOD) last_rsi = rsi[-1] print("the latest RSI is {}".format(last_rsi)) closes.pop(0) print("Closes:") print(closes) if last_rsi > RSI_OVERBOUGHT: print("SELL - BEING OVERBOUGHT") sender.send_message( f"Etherium being overbought, current price: {close}. Can consider selling!" ) if last_rsi < RSI_OVERSOLD: print("BUY - BEING OVERSOLD") sender.send_message( f"Etherium being oversold, current price: {close}. Can consider buying!" )
def main(wb): print('开始爬取开源中国订单') sheet = wb.create_sheet('开源中国', 1) sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 for i in range(10): url = 'https://zb.oschina.net/project/contractor-browse-project-and-reward?applicationAreas=&moneyMinByYuan=&moneyMaxByYuan=&sortBy=30¤tTime=&pageSize=20¤tPage='.format( i + 1) id_list = get_id(url) if isinstance(id_list, list): for id in id_list: url = 'https://zb.oschina.net/project/detail?id=%s' % id desc = get_one_page(url) if isinstance(desc, str): desc = html2text.html2text(desc).strip() contact = get_contact(desc) sheet.append([count, desc, url, contact]) count += 1 elif isinstance(desc, tuple): print('开源中国详情爬取出错:%s' % desc[1]) elif isinstance(id_list, tuple): message = '开源中国爬取出错:%s' % id_list[1] print(message) send_message(message) print('结束爬取开源中国订单')
def client(): got_message = 0 authenicated = False while(authenicated == False): name = input("Введите имя:") password = input("Введите пароль:") reply = authenticate(name, password) authenicated = reply.json()["reply"] print("name", name) print("pass", password) while(True): print("\n\n") command = input('Введите команду(send - для отправки сообщения, get - для получения новых)') if command == 'send': text = input("Введите сообщение:") send_message(name, text) elif command == 'get': data = recieve_message(got_message) messages = data["messages"] print(messages) last_message = messages.pop() got_message = last_message["id"] else: print("Команда не найдена") return
def crawl_save_upload(): '''调用函数实现抓取、保存和上传数据文件''' print('-----数据抓取开始-----') wb = Workbook() codemart_crawler.main(wb) oschina_crawler.main(wb) rrkf_crawler.main(wb) wywaibao_crawler.main(wb) yuanjisong_crawler.main(wb) shixian_crawler.main(wb) print('-----数据抓取结束-----') print('-----文件保存开始-----') now = datetime.now() file = r'data/%s.xlsx' % now.strftime("%Y-%m-%d %H-%M-%S") wb.save(file) time.sleep(3) print('-----文件保存结束-----') print('-----文件上传开始-----') media_id = get_media_id(file) if isinstance(media_id, str): upload_result = send_file(media_id) if upload_result == True: print('文件上传成功:%s' % file) else: message = '文件上传失败:%s' % upload_result[1] print(message) send_message(message) else: message = '获取media_id失败:%s' % media_id[1] print(message) send_message(message) print('-----文件上传结束-----')
def report(self, is_test=False): def _check_need(time): _time = str(int(time.split(":")[0])) if _time == self.time_to_check: return True else: return False def _update_check_time(): i = REPORT_TIME.index(self.time_to_check) if i >= len(REPORT_TIME) - 1: new_time = REPORT_TIME[0] else: new_time = REPORT_TIME[i + 1] self.time_to_check = new_time conn = MongoConn(config=MONGODB_CONFIG) coll = conn.get_coll("system_var_coll") coll.update({"key": "report_check_time"}, {"$set": {"value": new_time}}) _date = datetime.now().strftime("%Y-%m-%d") _time = datetime.now().strftime("%H:%M:%S") if _check_need(_time): _update_check_time() # 执行 info = self.get_info(is_test) send_message(subject="定时任务执行报告", content=info, attachments=[]) conn = MongoConn(config=MONGODB_CONFIG) coll = conn.get_coll("report_info_coll") coll.insert(dict(date=_date, time=_time, datetime=datetime.now(), info=info)) else: pass
def main(wb, session, OrderModel, WebsiteModel): print('开始爬取人人开发订单') sheet = wb.create_sheet('人人开发', 2) sheet.append(['单据编号', '订单描述', '链接', '发布时间', '联系方式', '分配人员']) count = 1 website = session.query(WebsiteModel).get(3) for i in range(10, 0, -1): url = 'http://www.rrkf.com/serv/request?¤tPage=%d' % i info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] link = info['link'] details = get_detail(link) if isinstance(details, list): rid = 'rr-{}'.format(link.split('=')[1]) contact = get_contact(desc) is_valid = True if '剩余' in details[0] else False pub_time = datetime.strptime( details[1], "%Y-%m-%d %H:%M:%S") if details[1] else None order_query = session.query(OrderModel).get(rid) if order_query: is_valided = order_query.is_valid order_query.is_valid = is_valid if is_valid == True: sheet.append( [count, desc, link, pub_time, contact, '']) count += 1 if is_valided == False: order_query.is_delete = False if is_valided == True and is_valid == False: order_query.is_delete = True else: order = OrderModel( id=rid, desc=desc, link=link, contact=contact, category='', pub_time=pub_time, is_valid=is_valid, is_delete=False if is_valid else True) order.website = website session.add(order) if is_valid == True: sheet.append( [count, desc, link, pub_time, contact, '']) count += 1 else: message = '人人开发详情爬取第%d行出错:%s' % (details[0], details[1]) print(message) send_message(message) session.commit() elif isinstance(info_list, tuple): message = '人人开发爬取第%d行出错:%s' % (info_list[0], info_list[1]) print(message) send_message(message) print('结束爬取人人开发订单')
def ruuviScan(): sensor = RuuviTag(sensor_mac) state = sensor.update() json_str = '{"uid": "' + str(sensor_mac) + '", "payload": ' + json.dumps( state) + '}' print(json_str) sender.send_message(json_str) time.sleep(2)
def delete_data(): files = os.listdir('./data') file_count = len(files) if file_count > reserve_file_count: delete_count = file_count-reserve_file_count delete_files = files[:delete_count] for file in delete_files: os.remove('data/' + file) message = '已删除过期文件%d个' % delete_count print(message) send_message(message)
def main(wb, session, OrderModel, WebsiteModel): print('开始爬取码市订单') sheet = wb['Sheet'] sheet.title = '码市' sheet.append(['单据编号', '订单描述', '链接', '发布时间', '联系方式', '分配人员']) count = 1 website = session.query(WebsiteModel).get(1) for i in range(10, 0, -1): url = 'https://codemart.com/api/project?page=%d' % i result = get_one_page(url) if isinstance(result, list): for r in result: time_stamp = int(r['pubtime']) / 1000 publish_time = datetime.fromtimestamp(time_stamp) if publish_time < time_point: continue desc = ILLEGAL_CHARACTERS_RE.sub(r'', r['description']) cid = 'cm-{}'.format(r['id']) contact = get_contact(desc) link = 'https://codemart.com/project/{}'.format(r['id']) is_valid = True if r['status'] == '招募中' else False order_query = session.query(OrderModel).get(cid) if order_query: is_valided = order_query.is_valid order_query.is_valid = is_valid if is_valid == True: sheet.append( [count, desc, link, publish_time, contact, '']) count += 1 if is_valided == False: order_query.is_delete = False if is_valided == True and is_valid == False: order_query.is_delete = True else: order = OrderModel(id=cid, desc=desc, link=link, contact=contact, category=r['cate'], pub_time=publish_time, is_valid=is_valid, is_delete=False if is_valid else True) order.website = website session.add(order) if is_valid == True: sheet.append( [count, desc, link, publish_time, contact, '']) count += 1 session.commit() elif isinstance(result, tuple): message = '码市爬取第%d行出错:%s' % (result[0], result[1]) print(message) send_message(message) print('结束爬取码市订单')
def send_fragments(self, soc, identifier, file): lastByte = 0 max_payload = settings.maxFragSize - settings.MY_HEADER message = ' ' messages_list = [] fragmentNumber = 1 sending = True initial_fragment_send = False while sending: for i in range(0, 50): if not (initial_fragment_send): if not (self.send_initial_fragment(self.soc, identifier, file)): return initial_fragment_send = True continue message = b'' + file.read(max_payload) lastByte += message.__sizeof__() - 25 if (message == b''): flag = 'FIE' else: flag = 'FIL' payCheck = cryptograph.calculatePayCheck(message) if (fragmentNumber == 10 and settings.sent_faulty): payCheck = 0 completeMessage = sender.build_and_send( soc, identifier, flag, fragmentNumber, payCheck, message) messages_list.append(completeMessage) if (flag == 'FIE'): sending = False print('File sent') break else: fragmentNumber += 1 while (self.waitForConfirmation(soc, identifier)): print('Re-sending fragment') for miss_fragment in self.missing_fragments: completeMessage = messages_list.__getitem__( int(miss_fragment)) sender.send_message(soc, completeMessage) if (self.kill_thread): return
def run_server(): context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://*:5555") while True: try: # Wait for next request from client message = bytes2str(socket.recv()) # print("Received request: %s" % message) if message == "e": socket.send(b"error") raise Exception("错误测试") elif message == "t": send_message(subject="系统状态测试", content=["邮件发送正常"], attachments=[]) socket.send(str2bytes(message + " -->finished")) elif message == "s": send_message(subject="系统状态报告", content=["开始扫描"], attachments=[]) socket.send(str2bytes(message + " -->start")) scan_share(is_test=False) else: send_message(subject="系统状态报告", content=["运行正常"], attachments=[]) socket.send(str2bytes(message + " -->finished")) except Exception as e: print(get_exception_info(e)) send_message(subject="系统错误报告", content=[get_exception_info(e)], attachments=[])
def main(wb, session, OrderModel, WebsiteModel): print('开始爬取实现订单') sheet = wb.create_sheet('实现', 3) sheet.append(['单据编号', '订单描述', '链接', '发布时间', '联系方式', '分配人员']) count = 1 website = session.query(WebsiteModel).get(4) for i in range(10, 0, -1): url = 'https://shixian.com/job/all?page=%d&sort_arrow=down' % i info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] link = info['link'] contact = get_contact(desc) dl_time = datetime.strptime(info['start_time'], "%Y-%m-%d %H:%M:%S") is_valid = True if datetime.now() <= dl_time else False sid= 'sx-' + link.split('/')[-1] cate = get_category(link) if isinstance(cate, str): order_query = session.query(OrderModel).get(sid) if order_query: is_valided = order_query.is_valid order_query.is_valid = is_valid if is_valid == True: sheet.append([count, desc, link, '', contact, '']) count += 1 if is_valided == False: order_query.is_delete = False if is_valided == True and is_valid == False: order_query.is_delete = True else: order = OrderModel(id=sid, desc=desc, link=link, contact=contact, category=cate, pub_time=None, is_valid=is_valid, is_delete=False if is_valid else True) order.website = website session.add(order) if is_valid == True: sheet.append([count, desc, link, '', contact, '']) count += 1 else: message = '实现详情爬取第%d行出错:%s' % (cate[0], cate[1]) print(message) send_message(message) time.sleep(random.random()/10) session.commit() elif isinstance(info_list, tuple): message = '实现爬取第%d行出错:%s' % (info_list[0], info_list[1]) print(message) send_message(message) print('结束爬取实现订单')
def main(): with requests.Session() as session: login_query = session.post(common.AT_LOGIN_URL, data={'Login': common.AT_LOGIN, 'Password': common.AT_PASSWORD}) assert login_query.status_code == 200 books = [] for book_url in common.AT_BOOK_URLS: book_query = session.get(book_url) assert book_query.status_code == 200 books.append(book.Book(book_url, book_query.text)) for b in books: last_update_timestamp = b.get_last_update_timestamp() if db.get_last_update_timestamp(b.link) != last_update_timestamp: db.set_last_update_timestamp(b.link, last_update_timestamp) sender.send_message(str(b))
def main(wb, session, OrderModel, WebsiteModel): print('开始爬取猿急送订单') sheet = wb.create_sheet('猿急送', 5) sheet.append(['单据编号', '订单描述', '链接', '发布时间', '联系方式', '分配人员']) count = 1 website = session.query(WebsiteModel).get(6) for i in range(10, 0, -1): url = 'https://www.yuanjisong.com/job/allcity/page%d' % i info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] link = info['link'] contact = get_contact(desc) is_valid = True if info['status'] == '投递职位' else False yid = 'yj-{}'.format(int(link.split('/')[-1])) order_query = session.query(OrderModel).get(yid) if order_query: is_valided = order_query.is_valid order_query.is_valid = is_valid # if is_valided == False and is_valid == True: # sheet.append([count, desc, link, contact]) # count += 1 # order_query.is_delete = False if is_valid == True: sheet.append([count, desc, link, '', contact, '']) count += 1 if is_valided == False: order_query.is_delete = False if is_valided == True and is_valid == False: order_query.is_delete = True else: order = OrderModel(id=yid, desc=desc, link=link, contact=contact, category='', pub_time=None, is_valid=is_valid, is_delete=False if is_valid else True) order.website = website session.add(order) if is_valid == True: sheet.append([count, desc, link, '', contact, '']) count += 1 session.commit() elif isinstance(info_list, tuple): message = '猿急送爬取第%d行出错:%s' % (info_list[0], info_list[1]) print(message) send_message(message) print('结束爬取猿急送订单')
def main(wb): print('开始爬取51外包订单') sheet = wb.create_sheet('51外包', 3) sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 for i in range(10): url = 'http://www.51waibao.net/Project.html?page={}'.format(i + 1) info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] contact = get_contact(desc) sheet.append([count, desc, info['link'], contact]) count += 1 elif isinstance(info_list, tuple): message = '51外包爬取出错:%s' % info_list[1] print(message) send_message(message) print('结束爬取51外包订单')
def main(wb): print('开始爬取猿急送订单') sheet = wb.create_sheet('猿急送', 4) sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 for i in range(10): url = 'https://www.yuanjisong.com/job/allcity/page{}'.format(i + 1) info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] contact = get_contact(desc) sheet.append([count, desc, info['link'], contact]) count += 1 elif isinstance(info_list, tuple): message = '猿急送爬取出错:%s' % info_list[1] print(message) send_message(message) print('结束爬取猿急送订单')
def main(wb): print('开始爬取实现订单') sheet = wb.create_sheet('实现', 5) sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 for i in range(10): url = 'https://shixian.com/job/all?page={}&sort_arrow=down'.format(i + 1) info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] contact = get_contact(desc) sheet.append([count, desc, info['link'], contact]) count += 1 elif isinstance(info_list, tuple): message = '实现爬取出错:%s' % info_list[1] print(message) send_message(message) print('结束爬取实现订单')
def main(wb): print('开始爬取人人开发订单') sheet = wb.create_sheet('人人开发', 2) sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 for i in range(10): url = 'http://www.rrkf.com/serv/request?¤tPage={}'.format(i + 1) info_list = get_info(url) if isinstance(info_list, list): for info in info_list: desc = info['desc'] contact = get_contact(desc) sheet.append([count, desc, info['link'], contact]) count += 1 elif isinstance(info_list, tuple): message = '人人开发爬取出错:%s' % info_list[1] print(message) send_message(message) print('结束爬取人人开发订单')
def crawl_save_upload(): '''调用函数实现抓取、保存和上传数据文件''' print('-----数据抓取开始-----') wb = Workbook() engine, Base, session = get_mysql_connection() Order, Website = create_table(engine, Base) add_default_data(session, Website) codemart_crawler.main(wb, session, Order, Website) oschina_crawler.main(wb, session, Order, Website) rrkf_crawler.main(wb, session, Order, Website) shixian_crawler.main(wb, session, Order, Website) wywaibao_crawler.main(wb, session, Order, Website) yuanjisong_crawler.main(wb, session, Order, Website) print('-----数据抓取结束-----') print('-----文件保存开始-----') delete_data() now = datetime.now() file = r'data/%s.xlsx' % now.strftime("%Y-%m-%d %H-%M-%S") wb.save(file) time.sleep(3) print('-----文件保存结束-----') print('-----文件上传开始-----') media_id = get_media_id(file) if isinstance(media_id, str): upload_result = send_file(media_id) if upload_result == True: print('文件上传成功:%s' % file) else: message = '文件上传失败:%s' % upload_result[1] print(message) send_message(message) else: message = '获取media_id失败:%s' % media_id[1] print(message) send_message(message) print('-----文件上传结束-----')
def main(wb): print('开始爬取码市订单') start_time = time.time() sheet = wb['Sheet'] sheet.title = '码市' sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 for i in range(10): url = 'https://codemart.com/api/project?page={}'.format(i + 1) result = get_one_page(url, start_time) if isinstance(result, list): for r in result: desc = ILLEGAL_CHARACTERS_RE.sub(r'', r['description']) contact = get_contact(desc) sheet.append([ count, desc, 'https://codemart.com/project/{}'.format(r['id']), contact ]) count += 1 elif isinstance(result, tuple): message = '码市爬取出错:%s' % result[1] print(message) send_message(message) print('结束爬取码市订单')
def form_and_create_post(email_message, message_data, user, subject, reply_user, sitename): headers_to_delete = [ 'delivered-to', 'received', 'dkim-signature', 'message-id' ] try: attrs = {'author': user, 'title': subject, **email_message} # transform all headers to lowercase attrs = {k.lower(): v for k, v in attrs.items()} for header in headers_to_delete: if header in attrs: del attrs[header] # decode the message, assemble the files message = decoder.bytes_to_message(message_data) content, files = decoder.decode_message(message) [index_template, page_template] = interfacer.get_site_templates(sitename) new_html = assembler.assemble_content(attrs, content, files, index_template, page_template) except Exception as e: print(e) # something went wrong parsing. sender.send_message( assembler.assemble_email(email_message, messages.format_error), reply_user) print("{} parse fail".format(uid)) return try: post_url = interfacer.create_post(sitename, user, subject, new_html, files) sender.send_message( assembler.assemble_email(email_message, messages.posted, {'url': post_url}), reply_user) print("{} posted: {}".format(uid, post_url)) except: # something went wrong posting sender.send_message( assembler.assemble_email(email_message, messages.system_error), reply_user) print("{} post fail".format(uid)) return try: assembler.cleanup_tempfiles(files) except: # fine if this fails pass
if len(args) == 0: daemonize(func=start) else: if args[0] == "s": scan_share(is_test=True) elif args[0] == "f": params = {} if "t" in args: params["is_test"] = True if "a" in args: params["is_all"] = True if "s" in args: params["is_save"] = True start(**params) elif args[0] == "r": send_message(subject="推荐-" + "超卖", content=[], attachments=[]) elif args[0] == "d": params = {} if "t" in args: params["is_test"] = True if "a" in args: params["is_all"] = True if "s" in args: params["is_save"] = True daemonize(func=start, **params) elif args[0] == "dt": if "t" in args: scan_share(is_test=True) else: params = dict(is_test=True) daemonize(func=scan_share, **params)
from proton._reactor import Container from config import BROKER_URL, QUEUE_NAME from consumer import ExampleConsumer from sender import send_message if __name__ == "__main__": try: # send 5 message for i in range(5): send_message(url=BROKER_URL, queue=QUEUE_NAME, body=i) # consume with 40" sleep Container( ExampleConsumer(broker_url=BROKER_URL, amqp_queue_name=QUEUE_NAME, timeout=40)).run() except KeyboardInterrupt: pass
def fetch_and_decode_messages(new_messages): server = IMAPClient(constants.IMAP_HOST, ssl_context=ssl_context) server.login(constants.IMAP_USERNAME, constants.IMAP_PASSWORD) server.select_folder("INBOX") for uid, message_data in server.fetch(new_messages, "RFC822").items(): email_message = email.message_from_bytes(message_data[b"RFC822"]) user = email_message.get("From") receiver = email_message.get("To") reply_user = (email_message.get('Reply-To') or email_message.get('From')) date = email_message.get("Date") subject = email_message.get("Subject") intentions = deducer.deduce_intention(user, receiver, subject) print("{} received".format(uid)) if intentions[0][0] == 'page': form_and_create_post(email_message, message_data, user, subject, reply_user, 'assorted') elif intentions[0][0] == 'manage': try: # if this returns None, the user doesn't have permission to edit metadata = interfacer.get_site_metadata(subject, user) if metadata != None: # if the site doesn't already exist, create it if metadata['new']: interfacer.create_site(metadata) # create a new settings session url = interfacer.create_session_url(metadata) sender.send_message( assembler.assemble_email(email_message, messages.session_created, {'url': url}), reply_user) print("{} settings session created".format(uid)) else: sender.send_message( assembler.assemble_email(email_message, messages.permission_error, {'sitename': subject}), reply_user) print("{} settings permission denied".format(uid)) except Exception as e: # print(e) sender.send_message( assembler.assemble_email(email_message, messages.system_error), reply_user) print("{} settings fail".format(uid)) else: intention = intentions[0][0] metadata = interfacer.get_site_metadata(intention, user) if metadata != None and 'new' not in metadata: # the user is posting to their site and they have permission form_and_create_post(email_message, message_data, user, subject, reply_user, intention) server.logout()
def main(wb, session, OrdrModel, WebsiteModel): print('开始爬取51外包订单') sheet = wb.create_sheet('51外包', 4) sheet.append(['单据编号', '订单描述', '链接', '分配人员']) count = 1 website = session.query(WebsiteModel).get(5) for i in range(10, 0, -1): url = 'http://www.51waibao.net/Project.html?page=%d' % i link_list = get_links(url) if isinstance(link_list, list): for link in link_list: result = get_detail(link) if isinstance(result, list): date_str = result[3] publish_time = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") if publish_time < time_point: continue desc = result[4] contact = get_contact(desc) wid = 'wy-' + result[0] is_valid = False if '项目已过期' in result[2] else True order_query = session.query(OrdrModel).get(wid) if order_query: is_valided = order_query.is_valid order_query.is_valid = is_valid if is_valid == True: sheet.append( [count, desc, link, publish_time, contact, '']) count += 1 if is_valided == False: order_query.is_delete = False if is_valided == True and is_valid == False: order_query.is_delete = True else: order = OrdrModel( id=wid, desc=desc, link=link, contact=contact, category=result[1], pub_time=publish_time, is_valid=is_valid, is_delete=False if is_valid else True) order.website = website session.add(order) if is_valid == True: sheet.append( ['单据编号', '订单描述', '链接', '发布时间', '联系方式', '分配人员']) count += 1 else: message = '51外包详情爬取第%d行出错:%s' % (result[0], result[1]) print(message) send_message(message) time.sleep(random.random() / 10) session.commit() elif isinstance(link_list, tuple): message = '51外包爬取第%d行出错:%s' % (link_list[0], link_list[1]) print(message) send_message(message) print('结束爬取51外包订单')
def main(wb, session, OrderModel, WebsiteModel): print('开始爬取开源中国订单') sheet = wb.create_sheet('开源中国', 1) sheet.append(['单据编号', '订单描述', '链接', '发布时间', '联系方式', '分配人员']) count = 1 website = session.query(WebsiteModel).get(2) for i in range(10, 0, -1): url = 'https://zb.oschina.net/project/contractor-browse-project-and-reward?applicationAreas=&moneyMinByYuan=&moneyMaxByYuan=&sortBy=30¤tTime=&pageSize=20¤tPage=%d' % i id_list = get_id(url) if isinstance(id_list, list): for oid, otype in id_list: if otype == 2: url = 'https://zb.oschina.net/reward/detail?id=%d' % oid link = 'https://zb.oschina.net/reward/detail.html?id=%s' % oid else: url = 'https://zb.oschina.net/project/detail?id=%s' % oid link = 'https://zb.oschina.net/project/detail.html?id=%s' % oid result = get_one_page(url) if isinstance(result, list): publish_time = result[3] if publish_time < time_point: continue desc = html2text.html2text(result[0]).strip() is_valid = True if result[1] == 3 else False contact = get_contact(desc) oid = 'oc-{}'.format(oid // 10) order_query = session.query(OrderModel).filter_by( desc=desc, pub_time=publish_time).first() if order_query: is_valided = order_query.is_valid order_query.is_valid = is_valid if is_valid == True: sheet.append( [count, desc, link, publish_time, contact, '']) count += 1 if is_valided == False: order_query.is_delete = False if is_valided == True and is_valid == False: order_query.is_delete = True else: order = OrderModel( id=oid, desc=desc, link=link, contact=contact, category=result[2], pub_time=publish_time, is_valid=is_valid, is_delete=False if is_valid else True) order.website = website session.add(order) if is_valid == True: sheet.append( [count, desc, link, publish_time, contact, '']) count += 1 elif isinstance(result, tuple): message = '开源中国详情爬取第%d行出错:%s' % (result[0], result[1]) print(message) send_message(message) session.commit() elif isinstance(id_list, tuple): message = '开源中国爬取第%d行出错:%s' % (id_list[0], id_list[1]) print(message) send_message(message) print('结束爬取开源中国订单')