Example #1
0
def mostrar_materias(count=-1):
    try:
        crawler = login(EMAIL, SENHA)
        materias = crawler.materias()
        for i, materia in enumerate(materias):
            pos = i + 1
            print('%d - %s' % (pos, materia[0]))
            if count == pos:
                break
    except LoginException as e:
        print(e)
Example #2
0
def baixar_aula(titulo_aula, data_aula, type, path=''):
    try:
        crawler = login(EMAIL, SENHA)
        for file in crawler.baixarAula(data_aula, type, path=path):
            p = (file[0] * 100) / file[1]
            sys.stdout.write('Baixando %s %s %.2f%% \r' %
                             (type, titulo_aula, p))
            sys.stdout.flush()
        print('\n')
    except LoginException as e:
        print(e)
Example #3
0
def mostrar_cursos(nome_materia, numero_curso=0):
    try:
        crawler = login(EMAIL, SENHA)
        for i, curso in enumerate(crawler.cursos(nome_materia)):
            pos = i + 1
            if numero_curso:
                if numero_curso == pos:
                    return curso[1]
            else:
                print('%d - %s' % (pos, curso[0]))
    except LoginException as e:
        print(e)
Example #4
0
def read_tickets_from_file():
    # ========================= Control Panel ================================ #
    USERNAME = sys.argv[1] #this is the username to supply the login window
    PASSWORD = sys.argv[2] #this is the password for the login window
    TICKET_FILE = sys.argv[3] #this is the file with the ticket numbers
    LOGIN_URL = "<login url here>"
    PAGE_URL = "<main_page_url_here>"
    LOGOUT_URL = "<logout url here>"
    HEADING = True
    # ======================================================================== #

    c_t = sum(1 for line in open(TICKET_FILE, 'rt')) #total number of tickets
    c_i = 1 #a count to keep track by when retrieving

    inlog = open(TICKET_FILE, 'rt')
    if HEADING:
        inlog.next() #ignore heading on ticket file
        c_t -= 1 #remove heading from count
    
    browser = login(LOGIN_URL, USERNAME, PASSWORD) #create logged in browser obj
    for t in inlog:
        try:
            t = t.split(',')[0]
            t = t.strip() #remove newline character
            print 'Retrieving ticket %s (%d of %d)' % (t, c_i, c_t)
            page = read_page(browser, PAGE_URL + t) #retrieve page
            html = page.read() #extract html
            [txt_list, txt_str] = get_text(html) #clean html and return content

            #write ticket content to file
            outlog = open('tickets/' + t + '.txt', 'wt') 
            outlog.write(txt_str) 
            outlog.close()
        except:
            pass
        c_i += 1 #increment count
        
    inlog.close()

    #logout and verify logout
    logout(browser, LOGOUT_URL)
    page = read_page(browser, PAGE_URL)
    html = page.read()
    [_, txt_str] = get_text(html)
    if "pagetype = 'login'" in txt_str:
        print 'Logoff successful!'
    else:
        print txt_str
Example #5
0
def mostrar_aulas(url_curso, numero_aula=0):
    try:
        crawler = login(EMAIL, SENHA)
        aulas = crawler.aulas(url_curso)
        for i, aula in enumerate(reversed(list(aulas))):
            pos = i + 1
            if numero_aula:
                if numero_aula == pos:
                    return (aula['titulo_aula'],
                            aula['data_aula'],
                            aula['types']
                            )
            else:
                print('%d - %s' % (pos, aula['titulo_aula']))

    except LoginException as e:
        print(e)
Example #6
0
filename = "ETF_List_Filtered.csv"
file = open(filename, "r")
#name = "output.csv"
#output = open(name, "w")
#
#
a = file.readline()
#a = a.split(",")
##print(a[-1])
#output.write(""+a[0]+", "+a[-1]+"")
##output.close()

# 34 etfs
for i in range(34):
    a = file.readline()
    #print(a)
    a = a.split(",")
    #print(type(a))
    
    # 15 pages in total
    session = crawler.login()
    for j in range(1,16):
        url = "https://ycharts.com/companies/"+a[0]+"/net_asset_value.json?endDate=12/31/2018&pageNum="+str(j)+"&startDate=12/31/2015"
        reqs = session.get(url)
        rj = json.loads(reqs.text)
        to_deal = rj["data_table_html"]
        time.sleep(1)
        print(type(to_deal))
    

#output.close()
Example #7
0
 def login(self, username, password):
     cookie = crawler.login(username, password)
     if cookie:
         return cookie.as_lwp_str()
     else:
         return ''
Example #8
0
    def run(self):
        respGen = RespGen.RespGen()  # 生成回答准备 需要word
        q = SimpleQueue()
        cred = DouUtil.getCred()
        pwd = cred['pwd']  # 账号密码 需要txt
        userName = cred['userName']
        loginReqUrl = 'https://accounts.douban.com/j/mobile/login/basic'

        while True:
            # 计时
            begin_time = datetime.now()

            # s = requests.Session()
            reqWrapper = requestsWrapper.ReqWrapper()
            s = reqWrapper._session
            s.cookies.clear()  # 清除cookies

            if not self.cbstatus:
                if crawler.login(loginReqUrl, pwd, userName, s):
                    DouUtil.flushCookies(s)
                else:
                    time.sleep(180)
                    break

            s.headers.update({
                'Host': 'www.douban.com',
                'Connection': 'keep-alive',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
                'Accept':
                'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
            })

            # 输出cookies
            cookies = DouUtil.loadCookies()
            c = ''
            for key, value in cookies.items():
                c += key + '=' + value + '; '
            c += '\n'
            self.ui.textEdit_2.append(c)
            s.cookies.update(DouUtil.loadCookies())  # cookies登录 需要txt

            slctr = NewPostSelector.NewPostSelector(q, reqWrapper)  # 选择需要评论的帖子
            timeToSleep = 5
            combo = 0

            while True:
                loop_time = datetime.now()
                daytime = datetime(loop_time.year, loop_time.month,
                                   loop_time.day, 11, 30)  # 白天11.30之前
                nighttime = datetime(loop_time.year, loop_time.month,
                                     loop_time.day, 23, 0)  # 晚上23.00之后
                time_gap = (loop_time - begin_time).total_seconds() // 60  # 分钟
                print("programme running time: " + str(time_gap))

                if (loop_time - daytime).total_seconds() > 0 and (
                        loop_time - nighttime).total_seconds() < 0:
                    if time_gap >= 180 + random.randint(0, 10):
                        self.ui.textEdit.append('关闭当前session,开启下一个session\n')
                        s.close()
                        time.sleep(180)
                        break

                q = slctr.select()  # 评论数小于5
                if q.qsize() == 0:
                    # print((loop_time - daytime).total_seconds())
                    # print((loop_time - nighttime).total_seconds())

                    if (loop_time - daytime).total_seconds() > 0 and (
                            loop_time - nighttime).total_seconds() < 0:
                        timeToSleep = random.randint(50, 70)
                    else:
                        timeToSleep = random.randint(600, 900)
                    log.debug("sleep for empty queue: ", timeToSleep)
                    # 输出睡眠时间
                    time.sleep(timeToSleep)

                else:
                    timeToSleep = random.randint(5, 30)
                    # timeToSleep = 5
                log.info("****selection, q size: ", q.qsize(),
                         "timeToSleep: " + str(timeToSleep) + "****")
                try:
                    file = open('resources/record.txt', 'a', encoding='utf-8')
                    recorder = open('resources/histo.txt',
                                    "a",
                                    encoding='utf-8')

                    while q.qsize() > 0:
                        tup = q.get(timeout=3)
                        question, postUrl, dajie = tup[0], tup[1], tup[2]

                        resp = respGen.getResp(question, dajie)
                        crawler.postCmnt(reqWrapper, postUrl, question,
                                         resp)  # 评论
                        # 输出评论
                        self.ui.textEdit.append(question + ' ' + resp + '\n')

                        sleepCmnt = random.randint(20, 30)
                        #
                        time.sleep(sleepCmnt)

                        log.debug("sleep cmnt: ", sleepCmnt)

                        recorder.write(postUrl.split('/')[5] + '\n')
                        record = question + ': ' + resp + '\n'
                        file.write(record)

                except Empty:
                    log.info("Emptied q, one round finished")
                finally:
                    file.close()
                    recorder.close()
                    DouUtil.flushCookies(s)
Example #9
0
 def login(self, username, password):
     cookie = crawler.login(username, password)
     if cookie:
         return cookie.as_lwp_str()
     else:
         return ''