def mostrar_materias(count=-1): try: crawler = login(EMAIL, SENHA) materias = crawler.materias() for i, materia in enumerate(materias): pos = i + 1 print('%d - %s' % (pos, materia[0])) if count == pos: break except LoginException as e: print(e)
def baixar_aula(titulo_aula, data_aula, type, path=''): try: crawler = login(EMAIL, SENHA) for file in crawler.baixarAula(data_aula, type, path=path): p = (file[0] * 100) / file[1] sys.stdout.write('Baixando %s %s %.2f%% \r' % (type, titulo_aula, p)) sys.stdout.flush() print('\n') except LoginException as e: print(e)
def mostrar_cursos(nome_materia, numero_curso=0): try: crawler = login(EMAIL, SENHA) for i, curso in enumerate(crawler.cursos(nome_materia)): pos = i + 1 if numero_curso: if numero_curso == pos: return curso[1] else: print('%d - %s' % (pos, curso[0])) except LoginException as e: print(e)
def read_tickets_from_file(): # ========================= Control Panel ================================ # USERNAME = sys.argv[1] #this is the username to supply the login window PASSWORD = sys.argv[2] #this is the password for the login window TICKET_FILE = sys.argv[3] #this is the file with the ticket numbers LOGIN_URL = "<login url here>" PAGE_URL = "<main_page_url_here>" LOGOUT_URL = "<logout url here>" HEADING = True # ======================================================================== # c_t = sum(1 for line in open(TICKET_FILE, 'rt')) #total number of tickets c_i = 1 #a count to keep track by when retrieving inlog = open(TICKET_FILE, 'rt') if HEADING: inlog.next() #ignore heading on ticket file c_t -= 1 #remove heading from count browser = login(LOGIN_URL, USERNAME, PASSWORD) #create logged in browser obj for t in inlog: try: t = t.split(',')[0] t = t.strip() #remove newline character print 'Retrieving ticket %s (%d of %d)' % (t, c_i, c_t) page = read_page(browser, PAGE_URL + t) #retrieve page html = page.read() #extract html [txt_list, txt_str] = get_text(html) #clean html and return content #write ticket content to file outlog = open('tickets/' + t + '.txt', 'wt') outlog.write(txt_str) outlog.close() except: pass c_i += 1 #increment count inlog.close() #logout and verify logout logout(browser, LOGOUT_URL) page = read_page(browser, PAGE_URL) html = page.read() [_, txt_str] = get_text(html) if "pagetype = 'login'" in txt_str: print 'Logoff successful!' else: print txt_str
def mostrar_aulas(url_curso, numero_aula=0): try: crawler = login(EMAIL, SENHA) aulas = crawler.aulas(url_curso) for i, aula in enumerate(reversed(list(aulas))): pos = i + 1 if numero_aula: if numero_aula == pos: return (aula['titulo_aula'], aula['data_aula'], aula['types'] ) else: print('%d - %s' % (pos, aula['titulo_aula'])) except LoginException as e: print(e)
filename = "ETF_List_Filtered.csv" file = open(filename, "r") #name = "output.csv" #output = open(name, "w") # # a = file.readline() #a = a.split(",") ##print(a[-1]) #output.write(""+a[0]+", "+a[-1]+"") ##output.close() # 34 etfs for i in range(34): a = file.readline() #print(a) a = a.split(",") #print(type(a)) # 15 pages in total session = crawler.login() for j in range(1,16): url = "https://ycharts.com/companies/"+a[0]+"/net_asset_value.json?endDate=12/31/2018&pageNum="+str(j)+"&startDate=12/31/2015" reqs = session.get(url) rj = json.loads(reqs.text) to_deal = rj["data_table_html"] time.sleep(1) print(type(to_deal)) #output.close()
def login(self, username, password): cookie = crawler.login(username, password) if cookie: return cookie.as_lwp_str() else: return ''
def run(self): respGen = RespGen.RespGen() # 生成回答准备 需要word q = SimpleQueue() cred = DouUtil.getCred() pwd = cred['pwd'] # 账号密码 需要txt userName = cred['userName'] loginReqUrl = 'https://accounts.douban.com/j/mobile/login/basic' while True: # 计时 begin_time = datetime.now() # s = requests.Session() reqWrapper = requestsWrapper.ReqWrapper() s = reqWrapper._session s.cookies.clear() # 清除cookies if not self.cbstatus: if crawler.login(loginReqUrl, pwd, userName, s): DouUtil.flushCookies(s) else: time.sleep(180) break s.headers.update({ 'Host': 'www.douban.com', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', }) # 输出cookies cookies = DouUtil.loadCookies() c = '' for key, value in cookies.items(): c += key + '=' + value + '; ' c += '\n' self.ui.textEdit_2.append(c) s.cookies.update(DouUtil.loadCookies()) # cookies登录 需要txt slctr = NewPostSelector.NewPostSelector(q, reqWrapper) # 选择需要评论的帖子 timeToSleep = 5 combo = 0 while True: loop_time = datetime.now() daytime = datetime(loop_time.year, loop_time.month, loop_time.day, 11, 30) # 白天11.30之前 nighttime = datetime(loop_time.year, loop_time.month, loop_time.day, 23, 0) # 晚上23.00之后 time_gap = (loop_time - begin_time).total_seconds() // 60 # 分钟 print("programme running time: " + str(time_gap)) if (loop_time - daytime).total_seconds() > 0 and ( loop_time - nighttime).total_seconds() < 0: if time_gap >= 180 + random.randint(0, 10): self.ui.textEdit.append('关闭当前session,开启下一个session\n') s.close() time.sleep(180) break q = slctr.select() # 评论数小于5 if q.qsize() == 0: # print((loop_time - daytime).total_seconds()) # print((loop_time - nighttime).total_seconds()) if (loop_time - daytime).total_seconds() > 0 and ( loop_time - nighttime).total_seconds() < 0: timeToSleep = random.randint(50, 70) else: timeToSleep = random.randint(600, 900) log.debug("sleep for empty queue: ", timeToSleep) # 输出睡眠时间 time.sleep(timeToSleep) else: timeToSleep = random.randint(5, 30) # timeToSleep = 5 log.info("****selection, q size: ", q.qsize(), "timeToSleep: " + str(timeToSleep) + "****") try: file = open('resources/record.txt', 'a', encoding='utf-8') recorder = open('resources/histo.txt', "a", encoding='utf-8') while q.qsize() > 0: tup = q.get(timeout=3) question, postUrl, dajie = tup[0], tup[1], tup[2] resp = respGen.getResp(question, dajie) crawler.postCmnt(reqWrapper, postUrl, question, resp) # 评论 # 输出评论 self.ui.textEdit.append(question + ' ' + resp + '\n') sleepCmnt = random.randint(20, 30) # time.sleep(sleepCmnt) log.debug("sleep cmnt: ", sleepCmnt) recorder.write(postUrl.split('/')[5] + '\n') record = question + ': ' + resp + '\n' file.write(record) except Empty: log.info("Emptied q, one round finished") finally: file.close() recorder.close() DouUtil.flushCookies(s)