Esempio n. 1
0
def login():
    global b
    b = Browser(driver_name="chrome")
    for i in range(0,3):
        b.visit(gift_url)
        b.find_by_id("ptLoginBtn").click()
        sleep(1)
        with b.get_iframe('loginFrame') as iframe:
            iframe.find_by_id('u').fill(username[i])
            iframe.find_by_id('p').fill(passwd[i])
            iframe.find_by_id('go').click()
        sleep(1)
        b.find_by_tag("a")[2].click()
        sleep(1)
        #大区,需要改成自己的,这是大地飞鹰
        b.find_by_xpath('//select[@id="area1ContentId_wuxia"]/option[@value="7609516"]')._element.click()
        sleep(1)
        #服务器,需要改成自己的,这是藏锋谷
        b.find_by_xpath('//select[@id="areaContentId_wuxia"]/option[@value="2002"]')._element.click()
        sleep(1)
        b.find_by_id("confirmButtonId_wuxia").click()
        sleep(1)
        b.get_alert().dismiss()
        sleep(1)
        b.find_by_id("ptLogoutBtn").click()
        sleep(5)
    print u"领取完毕"
    sleep(3)
Esempio n. 2
0
class metaCatcher:
    def __init__(self):
        self.browser = Browser()
        self.browser.driver.set_page_load_timeout(5)

    def set_url(self, url):
        self.url = url

    def download(self, url, name):
        if os.path.exists(name):
            return
        result = requests.get(url)
        if '</html>' in result.content:
            return
        output = open(name, 'w')
        output.write(result.content)
        output.close()

    def catch(self):
        self.browser.visit(self.url)
        items = self.browser.find_by_css('.dataset-heading')
        tmpitems = []
        setName = []
        for item in items:
            href = item.find_by_tag('a')
            setName.append(href[0].text)
            tmpitems.append(href[0]['href'])
        print setName

        i = -1
        for href in tmpitems:
            i += 1
            try:
                self.browser.visit(href)
            except TimeoutException:
                pass
            groups = self.browser.find_by_tag('a')
            for g in groups:
                #g.text
                downloadurl = str(g['href'])
                if g.text == 'Download Metadata':
                    #print downloadurl
                    #print downloadurl
                    name = setName[i]
                    self.download(downloadurl,
                                  './meta/Safety1/' + name + '.json')
Esempio n. 3
0
    def enterprise(self):
        driver = Browser(driver_name=BROWSER['SPLINTER']['NAME'], executable_path=BROWSER['SPLINTER']['PATH'], headless=True)
        driver.visit(self.text)
        fiscal = False
        n = 0

        items = driver.find_by_tag('strong')
        for seq, item in enumerate(items):
            n += 1
            if not item.text == '财报信息':
                if seq == 0:
                    content = '全球企业动态[%s]\n\n' % datetime.now().date()
                elif seq == 1:
                    content += '概要:\n%s\n' % item.text
                else:
                    content += '\n%d. %s\n' % (seq-1, item.text)
            else:
                fiscal = True
                break
        if fiscal:
            content += '\n财报信息:\n' + driver.find_by_xpath('//*[@id="js_content"]/p[%s]' % str(n*2)).text
        
        content += '\n\n' + self.text
        return content
NOTE: you maybe want to change the webdriver, " Browser() ",
and add your prefered driver, Browser('webdriver.chrome') for example,
but by default is always set as webdriver.firefox

NOTE: choose one url that contains broken links to see the
response

NOTE: this is a basic code, you can improve it and do what  you want,
believe, you could do almost everything :)

More information, see the docs: http://splinter.cobrateam.info/docs/
"""
from splinter.browser import Browser
from splinter.request_handler.status_code import HttpResponseError

browser = Browser()
# Visit URL
url = "http://splinter.cobrateam.info/"
browser.visit(url)
# Get all links in this page
urls = [a['href'] for a in browser.find_by_tag('a')]
# Visit each one link and verify if is ok
for url in urls:
    try:
        browser.visit(url)
        if browser.status_code.is_success():
            print '(', browser.status_code.code, ') visit to', url, 'was a success!'
    except HttpResponseError, e:
        print '(', e.status_code, ') visit to', url, 'was fail! Error:', e.reason

browser.quit()
Esempio n. 5
0
            TAMANHO = B.find_by_css(
                '.modal-body .item.active .input-group a')[1].click()
        else:
            TAMANHO = SELETOR_TAMANHOS[random.randint(0, QUANTIDADE_TAMANHOS -
                                                      1)]  #Seleciona tamanho
            TAMANHO.find_by_css(
                'a')[1].click()  #Clica na tag A que adiciona itens
        time.sleep(SLEEP)
        i = i + 1
    B.find_by_css('div.modal-footer').click()
    B.find_by_css('button.btn.btn-primary').click()
    VARPEDIDOS = VARPEDIDOS + 1

#Selecionar o carrinho
time.sleep(SLEEP)
B.find_by_tag('span.badge.badge-primary').click()
#B.find_by_css('i.glyphicon.glyphicon-shopping-cart').click()
"""Caso queira acrescentar as quantidades depois de selecionar todos os produtos...
#Selecionar modal dos tamanhos
time.sleep(SLEEP)
B.find_by_css('i.glyphicon.glyphicon-edit').click()

#Selecionar quantidade
i = 0
while i < QTD:
    SELETOR_TAMANHOS = B.find_by_css('.modal-body .item.active .input-group')
    QUANTIDADE_TAMANHOS = len(SELETOR_TAMANHOS)
    if QUANTIDADE_TAMANHOS == 1:
        TAMANHO = SELETOR_TAMANHOS.first
    else:
        TAMANHO = SELETOR_TAMANHOS[random.randint(0, QUANTIDADE_TAMANHOS-1)] #seleciona tamanho
Esempio n. 6
0
class DouYin(object):
    def __init__(self, width=500, height=300):
        """

		抖音App视频下载

		"""

        # 无头浏览器

        chrome_options = Options()

        chrome_options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"'
        )

        self.driver = Browser(driver_name='chrome',
                              executable_path='F:/chromedriver_win32',
                              options=chrome_options,
                              headless=True)

    def get_video_urls(self, user_id):
        """

		获得视频播放地址

		Parameters:

			user_id:查询的用户ID

		Returns:

			video_names: 视频名字列表

			video_urls: 视频链接列表

			nickname: 用户昵称

		"""

        video_names = []

        video_urls = []

        unique_id = ''

        while unique_id != user_id:

            search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id

            req = requests.get(url=search_url, verify=False)

            html = json.loads(req.text)

            aweme_count = html['user_list'][0]['user_info']['aweme_count']

            uid = html['user_list'][0]['user_info']['uid']

            nickname = html['user_list'][0]['user_info']['nickname']

            unique_id = html['user_list'][0]['user_info']['unique_id']

        user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (
            uid, aweme_count)

        req = requests.get(url=user_url, verify=False)

        html = json.loads(req.text)

        i = 1

        for each in html['aweme_list']:

            share_desc = each['share_info']['share_desc']

            if '抖音-原创音乐短视频社区' == share_desc:

                video_names.append(str(i) + '.mp4')

                i += 1

            else:

                video_names.append(share_desc + '.mp4')

            video_urls.append(each['share_info']['share_url'])

        return video_names, video_urls, nickname

    def get_download_url(self, video_url):
        """

		获得带水印的视频播放地址

		Parameters:

			video_url:带水印的视频播放地址

		Returns:

			download_url: 带水印的视频下载地址

		"""

        req = requests.get(url=video_url, verify=False)

        bf = BeautifulSoup(req.text, 'lxml')

        script = bf.find_all('script')[-1]

        video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]

        video_html = json.loads(video_url_js)

        download_url = video_html['video']['play_addr']['url_list'][0]

        return download_url

    def video_downloader(self, video_url, video_name, watermark_flag=True):
        """

		视频下载

		Parameters:

			video_url: 带水印的视频地址

			video_name: 视频名

			watermark_flag: 是否下载不带水印的视频

		Returns:

			无

		"""

        size = 0

        if watermark_flag == True:

            video_url = self.remove_watermark(video_url)

        else:

            video_url = self.get_download_url(video_url)

        with closing(requests.get(video_url, stream=True,
                                  verify=False)) as response:

            chunk_size = 1024

            content_size = int(response.headers['content-length'])

            if response.status_code == 200:

                sys.stdout.write('  [文件大小]:%0.2f MB\n' %
                                 (content_size / chunk_size / 1024))

                with open(video_name, "wb") as file:

                    for data in response.iter_content(chunk_size=chunk_size):

                        file.write(data)

                        size += len(data)

                        file.flush()

                        sys.stdout.write('  [下载进度]:%.2f%%' %
                                         float(size / content_size * 100) +
                                         '\r')

                        sys.stdout.flush()

    def remove_watermark(self, video_url):
        """

		获得无水印的视频播放地址

		Parameters:

			video_url: 带水印的视频地址

		Returns:

			无水印的视频下载地址

		"""

        self.driver.visit('http://douyin.iiilab.com/')

        self.driver.find_by_tag('input').fill(video_url)

        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()

        html = self.driver.find_by_xpath(
            '//div[@class="thumbnail"]/div/p')[0].html

        bf = BeautifulSoup(html, 'lxml')

        return bf.find('a').get('href')

    def run(self):
        """

		运行函数

		Parameters:

			None

		Returns:

			None

		"""

        self.hello()

        user_id = input('请输入ID(例如40103580):')

        video_names, video_urls, nickname = self.get_video_urls(user_id)

        if nickname not in os.listdir():

            os.mkdir(nickname)

        print('视频下载中:共有%d个作品!\n' % len(video_urls))

        for num in range(len(video_urls)):

            print('  解析第%d个视频链接 [%s] 中,请稍后!\n' % (num + 1, video_urls[num]))

            if '\\' in video_names[num]:

                video_name = video_names[num].replace('\\', '')

            elif '/' in video_names[num]:

                video_name = video_names[num].replace('/', '')

            else:

                video_name = video_names[num]

            self.video_downloader(video_urls[num],
                                  os.path.join(nickname, video_name))

            print('\n')

        print('下载完成!')

    def hello(self):
        """

		打印欢迎界面

		Parameters:

			None

		Returns:

			None

		"""

        print('*' * 100)

        print('\t\t\t\t抖音App视频下载小助手')

        print('\t\t作者:Jack Cui')

        print('*' * 100)
Esempio n. 7
0
class CatchTicket(object):
    def __init__(self):
        self.train = "G87"  # train to be order

        self.username = u"xxx"  # your login info on 12306
        self.passwd = u"xxx"  # your login info on 12306

        self.date = u"2019-01-21"
        #        self.from_station = u'%u5317%u4EAC%u897F%2CBXP'  # beijing xi
        #        self.to_station = u'%u897F%u5B89%u5317%2CEAY'  # xian bei

        xian_bei_cookie = u"%u897F%u5B89%u5317%2CEAY"
        beijing_xi_cookie = u"%u5317%u4EAC%u897F%2CBXP"
        langfang = u"%u5ECA%u574A%2CLJP"
        shanghai_hongqiao = u"%u4E0A%u6D77%2CSHH"

        self.from_station = beijing_xi_cookie
        self.to_station = xian_bei_cookie

        self.person = [u"xxx", u"xxx"]  # your name here

        self.login_url = "https://kyfw.12306.cn/otn/login/init"
        self.login_comp_url = "https://kyfw.12306.cn/otn/view/index.html"
        self.search_url = "https://kyfw.12306.cn/otn/leftTicket/init"
        self.order_submit_url = "https://kyfw.12306.cn/otn/confirmPassenger/initDc"

        self.driver = Browser(driver_name="chrome")
        self.driver.driver.set_window_size(1400, 1000)

    def login(self):
        self.driver.visit(self.login_url)
        self.driver.fill("loginUserDTO.user_name", self.username)
        self.driver.fill("userDTO.password", self.passwd)
        while self.driver.url != self.login_comp_url:
            print("fill in the certi code yourself...")
            sleep(1)
        print("login complete")

    def get_train_index(self):  # get the index of selected train
        all_div_tag = self.driver.find_by_tag("div")
        train_index = 0
        for div_tag in all_div_tag:
            div_tag = div_tag.text
            if "G" in div_tag:  # contains train
                for context in div_tag.split("\n"):
                    if re.search(r"G\d+", context):  # find train named G25
                        if context == self.train:
                            break
                        train_index += 1
                break
        return train_index

    def start_order(self):
        self.login()
        self.driver.visit(self.search_url)

        self.driver.cookies.add({"_jc_save_fromStation": self.from_station})
        self.driver.cookies.add({"_jc_save_toStation": self.to_station})
        self.driver.cookies.add({"_jc_save_fromDate": self.date})
        self.driver.reload()

        self.driver.find_by_text(u"GC-高铁/城际").click()
        for i in range(2):
            self.driver.find_by_text(u"历时").click()

        count = 1
        while self.driver.url != self.order_submit_url:
            try:
                print("Searching for {} time".format(count))
                self.driver.find_by_text(u"查询").click()

                if self.driver.find_by_text(u"网络繁忙"):
                    self.driver.find_by_text(u"确认").click()
                else:
                    train_index = self.get_train_index()
                    self.driver.find_by_text(u"预订")[train_index].click()
                sleep(1)
                count += 1
            except Exception as e:
                print(e)
                count += 1
                continue
        print("enter order submit page")
        sleep(1)
        for name in self.person:
            self.driver.find_by_text(name).last.click()
        try:
            self.driver.find_by_text(u"提交订单").click()
        #            self.driver.find_by_text(u'确认').click()
        except Exception as e:
            print(e)
        print("order complete not tru")
Esempio n. 8
0
browser = Browser()

#As of March 27, 2016
inp = csv.reader(file(fd + 'Complete_list.csv', 'rb'))
head = inp.next()
for e, i in enumerate(head):
    print e, i

fd2 = 'g:/health_data/provider_urls/'

for i in inp:
    if not re.search("^None|NOT SUBMITTED", i[2]):
        print i[1]
        try:
            outp = csv.writer(open(os.path.join(fd2, i[1] + '.csv'), 'wb'),
                              delimiter='\t')
            browser.visit(i[2])
            sleep(1)
            try:
                need = browser.find_by_css('pre')
                proc = json.loads(need[0].text)
            except:
                need = browser.find_by_tag('body')
                proc = json.loads(re.sub('}.*?$', '}', need[0].text))
            for p in proc['provider_urls']:
                outp.writerow([p])
            #call('taskkill /F /IM firefox.exe')
        except:
            traceback.print_exc()
Esempio n. 9
0
class GetInfo():
    conn = ''
    browser = ''

    def __init__(self, browser, conn=None, to_sql=None):

        # 数据库存储初始化 表名称、登陆用户名
        self.exportSql = to_sql

        if browser:
            self.browser = browser
            self.conn = conn
        else:
            try:
                self.browser = Browser("chrome", headless=False)
                self.browser.driver.set_window_size(1600, 1000)
            except Exception as e:
                self.browser = None

    # 登陆
    def login(self, info):
        # 账号,密码,
        account = info.get('account')
        password = info.get('password')

        if self.browser.url == 'https://17dz.com/manage/index.html':
            if account == self.loginName():
                return '登陆成功'

        self.browser.visit('https://17dz.com/home/login.html')

        # 校验是否为空
        if not all([account, password]):
            return jsonify(errmsg='参数不全')

        with self.browser.get_iframe('loginIframe') as iframe:
            iframe.find_by_css('input[id="id__0"]').first.fill(account)
            iframe.find_by_css('input[id="id__1"]').first.fill(password)
            iframe.find_by_text('登录').first.click()

        time.sleep(2)

        if self.browser.url == 'https://17dz.com/manage/index.html':
            return '登陆成功'
        else:
            return '账号和密码不匹配,请重新输入'

    def loginName(self):
        js = '''getloginName=function(){

                $.ajax({
                    type:'GET',
                    url: 'https://17dz.com/xqy-portal-web/manage/login/getLoginSession?_=1544003601263',
                    contentType:'application/json;charset=utf-8',
                    success: function (result) {
                        if(result.success) {
                            top.Id = result;
                        } else {
                            top.Id = result;
                        }
                    }
                })
            }'''

        self.browser.evaluate_script(js)
        self.browser.evaluate_script('getloginName()')
        i = 1
        loginName = ''
        while True:
            if self.browser.evaluate_script("top.Id"):
                loginName = self.browser.evaluate_script('top.Id').get(
                    'body').get('loginName')
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass
        self.browser.evaluate_script('top.Id=""')

        return loginName

    # 登陆成功后获取账套列表
    def getAllzt(self):

        # self.browser.find_by_text('凭证查看').first.click()
        import datetime
        d = datetime.datetime.now()
        period = datetime.date(d.year - (d.month == 1), d.month - 1 or 12,
                               1).strftime('%Y')

        js = '''getCustomerId=function(){
                var data = {customerName:"", pageNo:1 , pageSize:"500" , searchType:"ALL"}
                $.ajax({
                    type:'POST',
                    url: 'https://17dz.com/xqy-portal-web/manage/customer/queryCustomers',
                    contentType:'application/json;charset=utf-8',
                    data : JSON.stringify(data),
                    success: function (result) {
                        if(result.success) {
                            top.customerId = result;
                        } else {
                            top.customerId = result;
                        }
                    }
                })
            }
            '''

        new_js = '''getCustomerId=function(period){
                    var data = {"pageNo":1,
                                "pageSize":"500",
                                "period":period,
                                "customerNoOrNameLike":"",
                                "accountCloseStatus":"",
                                "sortField":"",
                                "sortDirection":false
                        }
                    $.ajax({
                        type:'POST',
                        url: 'https://17dz.com/xqy-portal-web/manage/finance/queryCustomer',
                        contentType:'application/json;charset=utf-8',
                        data : JSON.stringify(data),
                        success: function (result) {
                            if(result.success) {
                                top.customerId = result;
                            } else {
                                top.customerId = result;
                            }
                        }
                    })
                }'''

        self.browser.evaluate_script(new_js)

        self.browser.evaluate_script('getCustomerId("%s")' % period)
        i = 1
        Id = []
        while True:
            if self.browser.evaluate_script("top.customerId"):
                Id = self.browser.evaluate_script(
                    'top.customerId')['body']['list']
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass
        self.browser.evaluate_script('top.customerId=""')

        customerId = Id[0]['customerId']

        js2 = '''getAllzt=function(customerId){
                var data = {key: "", customerId: customerId}
                $.ajax({
                    type:'POST',
                    url: 'https://17dz.com/xqy-portal-web/manage/workbench/getAccountCustomers',
                    contentType:'application/json;charset=utf-8',
                    data : JSON.stringify(data),
                    success: function (result) {
                        if(result.success) {
                            top.zt_data = result;
                        } else {
                            top.zt_data = result;
                        }
                    }
                })
            }'''

        self.browser.evaluate_script(js2)

        self.browser.evaluate_script('getAllzt("%s")' % customerId)
        i = 1
        ztData = {}
        while True:
            if self.browser.evaluate_script("top.zt_data"):
                ztData = self.browser.evaluate_script('top.zt_data').get(
                    'body', '')
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass
        self.browser.evaluate_script('top.zt_data=""')

        return ztData

    # 切换账套,得到账套的起始和结束日期
    def switchZt(self, params):

        customerId = params['customerId']
        accountSetId = params['accountSetId']
        customerName = params['customerName']
        customerShortName = params['customerShortName']
        # getKhxx('127059881','4320800','上海路卡服装有限公司','上海路卡服装有限公司')
        js = '''getKhxx=function(customerId,accountSetId,customerName,customerShortName){
                $.ajax({
                    type:'PUT',
                    url:'https://17dz.com/xqy-portal-web/finance/account/session/accountSet',
                    data : {customerId:customerId,accountSetId:accountSetId,customerName:customerName,customerShortName:customerShortName,platform:'yqdz'},
                    dataType: 'json',
                    success: function (result) {
                        if(result.success) {
                            top.khxx = result;
                        } else {
                            top.khxx = result;
                        }
                    }
                })
            }'''

        self.browser.evaluate_script(js)

        self.browser.evaluate_script(
            'getKhxx("%s","%s","%s","%s")' %
            (customerId, accountSetId, customerName, customerShortName))
        i = 1
        khxx = {}
        while True:
            if self.browser.evaluate_script("top.khxx"):
                khxx = self.browser.evaluate_script('top.khxx')
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass
        self.browser.evaluate_script('top.khxx=""')
        try:
            startQj = khxx.get('body').get('createPeriod')
            endQj = khxx.get('body').get('lastPeriod')
        except Exception as e:
            return '网络异常,请稍后重试'

        dateStart = datetime.datetime.strptime(startQj, '%Y%m')
        dateEnd = datetime.datetime.strptime(endQj, '%Y%m')

        dates = []
        dates.append(dateStart.strftime('%Y%m'))
        while dateStart <= dateEnd:
            dateStart += datetime.timedelta(weeks=4)
            dates.append(dateStart.strftime('%Y%m'))

        datesList = sorted(list(set(dates)))

        return datesList

    # 凭证
    def voucher(self, QjList, ztID, infoname):

        js = '''get_Voucher=function(kjqj_date){
                var data = {"beginPeriod":kjqj_date,
                            "endPeriod":kjqj_date,
                            "titleCode":"",
                            "beginNumber":"",
                            "endNumber":"",
                            "beginMoney":"",
                            "endMoney":"",
                            "summary":"",
                            "pageSize":"1000",
                            "pageNo":0
                }
                $.ajax({
                    type: "POST",
                    url: 'https://17dz.com/xqy-portal-web/finance/accDocs/list',
                    contentType:'application/json;charset=utf-8',
                    data: JSON.stringify(data),
                    success: function (result) {
                        if(result.success) {
                            top.voucher_data = result;
                        } else {
                            top.voucher_data = result;
                        }
                    }
                })
            }
            '''
        self.browser.evaluate_script(js)
        #创建数据库的infonameID
        infonameID = self.exportSql.init_infoname(infoname).id
        try:
            for Qj in QjList:

                self.browser.evaluate_script('get_Voucher("%s")' % Qj)
                i = 1
                voucher_data = {}
                while True:
                    if self.browser.evaluate_script("top.voucher_data"):
                        data = self.browser.evaluate_script('top.voucher_data')
                        if data:
                            voucher_data = data.get('body')

                        break
                    elif i > 5:
                        break
                    else:
                        time.sleep(0.5)
                        i += 1
                        pass
                self.browser.evaluate_script('top.voucher_data=""')

                voucherString = json.dumps(voucher_data)

                # 保存到数据库
                self.exportSql.insert_new(ztID, Qj, infonameID, voucherString)
        except Exception as e:
            msg = '凭证导出失败:{}'.format(str(e))
        else:
            msg = '凭证导出成功'
        return msg

    # 科目余额表
    def kmsheet(
        self,
        QjList,
        ztID,
        infoname,
    ):

        # 创建数据库的infonameID
        infonameID = self.exportSql.init_infoname(infoname).id
        try:
            for Qj in QjList:
                #获取科目余额
                km_data = self.getKMBody(Qj)
                '''第一版
                # #获取数量金额式
                # slje_data = self.getKMBody(Qj,"B,S")
                # #获取外币金额式
                # wbje_data = self.getKMBody(Qj,"B,W")
                # li = {}
                # li['kmye'] = km_data
                # li['slje'] = slje_data
                # li['wbje'] = wbje_data'''

                # 保存到数据库
                kmString = json.dumps(km_data)
                self.exportSql.insert_new(ztID, Qj, infonameID, kmString)

        except Exception as e:
            msg = '科目余额导出失败:{}'.format(str(e))
        else:
            msg = '科目余额导出成功'
        return msg

    def getKMBody(self, Qj):
        js = '''getKMBody=function(kjqj_date){
                        var data = {
                            "beginPeriod":kjqj_date,
                            "endPeriod":kjqj_date,
                            "beginTitleCode":"",
                            "endTitleCode":"",
                            "pageNo":0,
                            "pageSize":5000,
                            "showYearAccumulated":true,
                            "assistantId":"",
                            "assistantType":"",
                            "showAssistant":true,
                            "titleLevel":6,
                            "showEndBalance0":true,
                            "showQuantity":false,
                            "fcurCode":""
                            }
                        $.ajax({
                            type: "POST",
                            url: 'https://17dz.com/xqy-portal-web/finance/accountBalanceSheet/query',
                            contentType:'application/json;charset=utf-8',
                            data: JSON.stringify(data),
                            success: function (result) {
                                if(result.success) {
                                    top.KMBody = result;
                                } else {
                                    top.KMBody = result;
                                }
                            }
                        })
                    }
                    '''
        self.browser.evaluate_script(js)

        # 获取科目余额
        self.browser.evaluate_script('getKMBody("%s")' % Qj)
        data_km = {}
        i = 1
        while True:
            if self.browser.evaluate_script("top.KMBody"):
                data_km = self.browser.evaluate_script('top.KMBody')['body']
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass
        self.browser.evaluate_script('top.KMBody=""')

        if Qj == "201601":
            print(data_km)

        return data_km

    # 辅助核算余额表  说明:
    def fzhssheet(self, QjList, company):

        js = '''getFzhssheet=function(kjqj_date){
                var data = {
                    "assistantType":"c",
                    "beginCode":"",
                    "endCode":"",
                    "beginPeriod":kjqj_date,
                    "endPeriod":kjqj_date,
                    "assistantId":"",
                    "bwsTypeList":"B",
                    "level":"6",
                    "showEmptyBalance":false,
                    "firstAccountTitle":false,
                    "accumulated":true
                }
                $.ajax({
                    type: "POST",
                    url: 'https://17dz.com/xqy-portal-web/finance/assistantBalanceBook/list',
                    contentType:'application/json;charset=utf-8',
                    data: JSON.stringify(data),
                    success: function (result) {
                        if(result.success) {
                            top.fzhs_data = result;
                        } else {
                            top.fzhs_data = result;
                        }
                    }
                })
            }'''

        self.browser.evaluate_script(js)

        fzhs_dict = {}

        for Qj in QjList:

            self.browser.evaluate_script('getFzhssheet("%s")' % Qj)
            i = 1
            while True:
                if self.browser.evaluate_script("top.fzhs_data"):
                    fzhsye = self.browser.evaluate_script(
                        'top.fzhs_data')['body']
                    break
                elif i > 5:
                    break
                else:
                    time.sleep(0.5)
                    i += 1
                    pass
            self.browser.evaluate_script('top.fzhs_data=""')

            slje = self.getFZBody(Qj, "s", "B,S")
            wbje = self.getFZBody(Qj, "w", "B,W")

            li = {}
            li['fzhsye'] = fzhsye
            li['slje'] = slje
            li['wbje'] = wbje

            fzhs_dict[str(Qj)] = li

        if not fzhs_dict:
            return '获取辅助核算余额表失败'

        # 保存到数据库
        try:
            self.exportSql.update_fzsheet(company, fzhs_dict)
        except Exception as e:
            return '辅助核算余额表保存失败:%s' % e

        return '辅助核算余额表导出成功'

    def getFZBody(self, Qj, balanceType, bwsTypeList):
        js = '''getFZBody=function(kjqj_date,balanceType,bwsTypeList){
                var data = {"beginPeriod":"201811",
                            "endPeriod":"201811",
                            "beginCode":"",
                            "endCode":"",
                            "assistantType":"c",
                            "assistantId":"",
                            "balanceType":balanceType,
                            "ifCondition":false,
                            "bwsTypeList":bwsTypeList,
                            "firstAccountTitle":false,
                            "showEmptyBalance":false,
                            "level":"6",
                            "accumulated":true
                }
                $.ajax({
                    type: "POST",
                    url: 'https://17dz.com/xqy-portal-web/finance/assistantBalanceBook/list',
                    contentType:'application/json;charset=utf-8',
                    data: JSON.stringify(data),
                    success: function (result) {
                        if(result.success) {
                            top.FZBody = result;
                        } else {
                            top.FZBody = result;
                        }
                    }
                })
            }'''

        self.browser.evaluate_script(js)

        # 获取科目余额
        self.browser.evaluate_script('getFZBody("%s","%s","%s")' %
                                     (Qj, balanceType, bwsTypeList))

        i = 1
        km_data = {}
        while True:
            if self.browser.evaluate_script("top.FZBody"):
                km_data = self.browser.evaluate_script('top.FZBody')['body']
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass
        self.browser.evaluate_script('top.FZBody=""')

        return km_data

    #现金流量
    def xjll(self, QjList, ztID, infoname):

        jd_js = '''xjll=function(url){
                $.ajax({
                    type:'GET',
                    url: url,
                    success: function (result) {
                        if(result.success) {
                            top.xjll_data = result;
                        } else {
                            top.xjll_data = result;
                        }
                    }
                })
            }'''

        # self.browser.evaluate_script(xjll_js)

        self.browser.evaluate_script('window.open("about:blank")')
        self.browser.windows.current = self.browser.windows[1]

        #获取起始季度
        jd_url = 'https://17dz.com/xqy-portal-web/finance/cashFlowInitial/queryInitialPeriod?_=1547174631618'
        self.browser.visit(jd_url)
        jd_jsonStr = self.browser.find_by_tag('pre').first.text
        init_jd = json.loads(jd_jsonStr)

        #月报
        # 创建数据库的infonameID
        Y_infonameID = self.exportSql.init_infoname(infoname + '-月报').id
        try:
            for Qj in QjList:
                time.sleep(0.5)
                y_url = 'https://17dz.com/xqy-portal-web/finance/cashFlowSheet?accountPeriod={}&sheetType=2&_=1543301545878'.format(
                    Qj)
                self.browser.visit(y_url)
                Y_jsonStr = self.browser.find_by_tag('pre').first.text

                #月报存库
                self.exportSql.insert_new(ztID, Qj, Y_infonameID, Y_jsonStr)
        except Exception as e:
            msg = '现金流量月报导出失败:{}'.format(str(e))
        else:
            msg = '现金流量月报导出成功'

        #季报
        # 创建数据库的infonameID
        J_infonameID = self.exportSql.init_infoname(infoname + '-季报').id
        try:
            for Qj in QjList:
                year = Qj[:4]
                if Qj[4:] in ['01', '02', '03']:
                    jd = '1'
                elif Qj[4:] in ['04', '05', '06']:
                    jd = '2'
                elif Qj[4:] in ['07', '08', '09']:
                    jd = '3'
                elif Qj[4:] in ['10', '11', '12']:
                    jd = '4'
                time.sleep(0.5)
                j_url = 'https://17dz.com/xqy-portal-web/finance/cashFlowSheet/quarterlyReport?year={}&season={}&_=1543301545880'.format(
                    year, jd)
                self.browser.visit(j_url)
                J_jsonStr = self.browser.find_by_tag('pre').first.text
                qj = '%s-%s' % (year, jd)
                # 季报存库
                self.exportSql.insert_new(ztID, qj, J_infonameID, J_jsonStr)
        except Exception as e:
            msg = '现金流量季报导出失败:{}'.format(str(e))
        else:
            msg = '现金流量季报导出成功'

        self.browser.windows.current.close()
        self.browser.windows.current = self.browser.windows[0]

        return msg

    # 基础设置
    def settings(self, customerId, ztID, accountSetId, QjList, infoname):
        set_js = '''getSettings=function(url){
                        $.ajax({
                            type:'GET',
                            url: url,
                            success: function (result) {
                                if(result.success) {
                                    top.load_data = result;
                                } else {
                                    top.load_data = result;
                                }
                            }
                        })
                    }'''
        self.browser.evaluate_script(set_js)

        #科目
        # 创建数据库的infonameID
        kmID = self.exportSql.init_infoname(infoname + '-科目').id
        km_dict = {}
        # 获取资产,负债,权益,成本,损益对应的编码
        code_url = 'https://17dz.com/xqy-portal-web/finance/accountTitle/types?systemAccountId=1&_=1542955356592'
        AllCodes = self.get_settings(code_url).get('body', [])
        for i in AllCodes:
            code = i['code']
            name = i['name']
            km_url = 'https://17dz.com/xqy-portal-web/finance/customerAccountTitles/' \
                  'listByType?customerId={}&subjectType={}&_=1542955356593'.format(customerId,code)
            res = self.get_settings(km_url)
            km_dict[name] = res

        #辅助核算
        # 创建数据库的infonameID
        fzID = self.exportSql.init_infoname(infoname + '-辅助核算').id
        fz_dict = {}
        Base_url = 'https://17dz.com/xqy-portal-web/finance/{}/list' \
                '/page?key=&accountSetId={}&customerId={}&pageNo=0&pageSize=10000'

        FZ_List = ['clients', 'suppliers', 'inventories', 'projects']

        for name in FZ_List:
            newUrl = Base_url.format(name, accountSetId, customerId)
            data = self.get_settings(newUrl).get('body')
            fz_dict[name] = data

        # 币别
        # 创建数据库的infonameID
        bbID = self.exportSql.init_infoname(infoname + '-币别').id
        url = 'https://17dz.com/xqy-portal-web/finance/exchangeRates/all?accountPeriod={}&_=1542955356686'
        for Qj in QjList:
            newurl = url.format(Qj)
            B = self.get_settings(newurl)
            # 将币别存库
            self.exportSql.insert_new(ztID, Qj, bbID, json.dumps(B))

        try:
            # 将科目存库
            self.exportSql.insert_new(ztID, '', kmID, json.dumps(km_dict))
            # 辅助核算保存入库
            self.exportSql.insert_new(ztID, '', fzID, json.dumps(fz_dict))

        except Exception as e:
            return '基础设置导出成功保存失败:{}'.format(str(e))

        return '基础设置导出成功'

    def get_settings(self, url):

        try:

            self.browser.evaluate_script('getSettings("%s")' % url)

        except Exception as e:
            print(e)
        i = 1
        settings_data = {}
        while True:
            if self.browser.evaluate_script("top.load_data"):
                settings_data = self.browser.evaluate_script('top.load_data')
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass

        self.browser.evaluate_script('top.load_data=""')

        return settings_data

    #获取现金流量
    def get_xjll(self, url):

        try:
            self.browser.evaluate_script('xjll("%s")' % url)
            time.sleep(5.5)
        except Exception as e:
            print(e)
        i = 1
        xjll_data = {}
        while True:
            if self.browser.evaluate_script("top.xjll_data"):
                xjll_data = self.browser.evaluate_script('top.xjll_data')
                break
            elif i > 5:
                break
            else:
                time.sleep(0.5)
                i += 1
                pass

        self.browser.evaluate_script('top.xjll_data=""')

        return xjll_data
and add your prefered driver, Browser('webdriver.chrome') for example,
but by default is always set as webdriver.firefox

NOTE: choose one url that contains broken links to see the
response

NOTE: this is a basic code, you can improve it and do what  you want,
believe, you could do almost everything :)

More information, see the docs: http://splinter.cobrateam.info/docs/
"""
from splinter.browser import Browser
from splinter.request_handler.status_code import HttpResponseError

browser = Browser()
# Visit URL
url = "http://splinter.cobrateam.info/"
browser.visit(url)
# Get all links in this page
urls = [a["href"] for a in browser.find_by_tag("a")]
# Visit each one link and verify if is ok
for url in urls:
    try:
        browser.visit(url)
        if browser.status_code.is_success():
            print "(", browser.status_code.code, ") visit to", url, "was a success!"
    except HttpResponseError, e:
        print "(", e.status_code, ") visit to", url, "was fail! Error:", e.reason

browser.quit()
Esempio n. 11
0
class TestEngine(object):
    __sleep_time = 2
    __mouse_over = True
    __mouse_over_sleep = 1

    def __init__(self, browser_name, execute_path=None):
        if execute_path is None:
            self.__browser = Browser(browser_name, fullscreen=True)
            self.__quit = False
        else:
            self.__browser = Browser(browser_name, executable_path=execute_path, fullscreen=True)
            self.__quit = False

    @staticmethod
    def set_config(config):
        TestEngine.__sleep_time = 2 if config.get("sleep_time") is None else config.get("sleep_time")
        TestEngine.__mouse_over = True if config.get("mouse_over") is None else config.get("mouse_over")
        TestEngine.__mouse_over_sleep = 1 if config.get("mouse_over_sleep") is None else config.get("mouse_over_sleep")

    def test_list_acts(self, domain, action_list, back_fun=None, result_back=None):
        thread_deal = threading.Thread(target=self.__test_list_thread, args=(domain, action_list, back_fun, result_back), name="TestEngine deal tester")
        thread_deal.start()

    def test_deal(self, domain, action_obj, back_fun=None, result_back=None):
        thread_deal = threading.Thread(target=self.__test_do_thread, args=(domain, action_obj, back_fun, result_back), name="TestEngine deal tester")
        # hasattr(result_back, "__call__")
        thread_deal.start()

    def quit(self):
        self.__quit = True
        self.__browser.quit()

    def is_quited(self):
        return self.__quit

    def __test_list_thread(self, domain, action_list, back_fun=None, result_back=None):
        try:
            for action in action_list:
                self.__test_do(domain, action, result_back)
        except Exception as e:
            raise Exception("[Error code] deal test list failed, error code=", e)
        finally:
            if action_list[0].waitClose != 0:
                sleep(action_list[0].waitClose)

                if back_fun is None:
                    self.quit()
                else:
                    back_fun()


    def __test_do_thread(self, domain, action_obj, back_fun=None, result_back=None):
        try:
            self.__test_do(domain, action_obj, result_back)
        except Exception as e:
            raise Exception("[Error code] deal test failed, error code=", e)
        finally:
            if action_obj.waitClose != 0:
                sleep(action_obj.waitClose)

                if back_fun is None:
                    self.quit()
                else:
                    back_fun()


    def __test_do(self, domain, action_obj, result_back=None):
        test_url = domain+action_obj.urlPath
        self.__browser.visit(test_url)

        # form表单默认为第一个action循环测试,之后的action按照顺序执行
        action_list = TesterActionData().dict_to_list(action_obj.actionList)
        if action_obj.forms is not None:
            form_action = action_list[0] if action_list else None

            forms = TesterForms().dict_to_list(action_obj.forms)
            for form in forms:
                params = TesterFormData().dict_to_list(form.params)
                for param in params:
                    self.__set_value(int(param.formType), param.formElName, param.formElValue.decode("utf-8"), int(param.index))
                    sleep(TestEngine.__sleep_time)

                if form_action is not None:
                    self.__deal_action(form_action, result_back)

                sleep(action_obj.sleepTime)

            for action_deal in action_list[1:]:
                self.__deal_action(action_deal, result_back)
                sleep(action_obj.sleepTime)
        else:
            for action_deal in action_list:
                self.__deal_action(action_deal, result_back)
                sleep(action_obj.sleepTime)


    def __set_value(self, form_type, el_name, el_value, index):
        elements = self.__event_element(form_type, el_name)
        element = elements[index]
        if element['type'] in ['text', 'password', 'tel'] or element.tag_name == 'textarea':
            element.value = el_value
        elif element['type'] == 'checkbox':
            if el_value:
                element.check()
            else:
                element.uncheck()
        elif element['type'] == 'radio':
            element.click()
        elif element._element.tag_name == 'select':
            element.find_by_value(el_value).first._element.click()
        else:
            element.value = el_value


    def __event_element(self, el_type, el_value):
        ele_type = EL_TYPE.value(el_type)

        if ele_type == "id":
            return self.__browser.find_by_id(el_value)
        elif ele_type == "name":
            return self.__browser.find_by_name(el_value)
        elif ele_type == "tag":
            return self.__browser.find_by_tag(el_value)
        elif ele_type == "value":
            return self.__browser.find_by_value(el_value)
        elif ele_type == "selector":
            return self.__browser.find_by_xpath(el_value)
        elif ele_type == "css":
            return self.__browser.find_by_css(el_value)
        else:
            raise ValueError("Test Engine can't deal the element type:%s, el_type:%s", ele_type, el_type)


    def __deal_action(self, action_data, result_back=None):
        action_type = ACTION_TYPE.value(action_data.action)

        # 当页面跳转是抓取最后一个打开的窗口页面
        self.__browser.windows.current = self.__browser.windows[-1]

        if action_type == "click":
            self.__mouse_of_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)])
        elif action_type == "double click":
            self.__mouse_of_double_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)])
        elif action_type == "right click":
            self.__mouse_of_right_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)])
        elif action_type == "mouse over":
            self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].mouse_over()
        elif action_type == "mouse out":
            self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].mouse_out()
        elif action_type == "select":
            self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].select()
        else:
            raise Exception("don't find action for action:%s", action_data.action)

        try:
            if action_data.testerResult is not None and result_back is not None:
                sleep(3)
                result_back(TesterResult(action_data.testerResult, self.__browser.is_text_present(action_data.testerResult)))
        except Exception:
            result_back(TesterResult(action_data.testerResult, False))


    def __mouse_of_click(self, event_deal_obj):
        if TestEngine.__mouse_over:
            event_deal_obj.mouse_over()
            sleep(TestEngine.__mouse_over_sleep)
            event_deal_obj.click()
        else:
            event_deal_obj.click()


    def __mouse_of_right_click(self, event_deal_obj):
        if TestEngine.__mouse_over:
            event_deal_obj.mouse_over()
            sleep(TestEngine.__mouse_over_sleep)
            event_deal_obj.right_click()
        else:
            event_deal_obj.click()


    def __mouse_of_double_click(self, event_deal_obj):
        if TestEngine.__mouse_over:
            event_deal_obj.mouse_over()
            sleep(TestEngine.__mouse_over_sleep)
            event_deal_obj.double_click()
        else:
            event_deal_obj.click()
class LemonLemon_douyin(object):
    def __init__(self, width=500, height=300):
        """
        抖音App视频下载
        """
        # 无头浏览器
        chrome_options = Options()
        chrome_options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"'
        )
        self.driver = Browser(driver_name="chrome",
                              options=chrome_options,
                              headless=True)

    def get_video_urls(self, input_f):
        """
        获得视频播放地址
        Parameters:
            user_id:查询的用户ID
        Returns:
            video_names: 视频名字列表
            video_urls: 视频链接列表
            nickname: 用户昵称
        """
        video_names = []
        video_urls = []
        i = 1
        now_date = datetime.datetime.now()
        self.date_today = (str(now_date.date()) + "_" + str(now_date.hour) +
                           ":" + str(now_date.minute))
        # unique_id = ''
        # while unique_id != user_id:
        #    search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
        #    req = requests.get(url = search_url, verify = False)
        #    html = json.loads(req.text)
        #    aweme_count = html['user_list'][0]['user_info']['aweme_count']
        #    uid = html['user_list'][0]['user_info']['uid']
        #    nickname = html['user_list'][0]['user_info']['nickname']
        #    unique_id = html['user_list'][0]['user_info']['unique_id']
        # user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
        # req = requests.get(url = user_url, verify = False)
        # html = json.loads(req.text)
        # i = 1
        # for each in html['aweme_list']:
        #    share_desc = each['share_info']['share_desc']
        #    if '抖音-原创音乐短视频社区' == share_desc:
        #        video_names.append(str(i) + '.mp4')
        #        i += 1
        #    else:
        #        video_names.append(share_desc + '.mp4')
        #    video_urls.append(each['share_info']['share_url'])
        with open(input_f) as f:
            for line in f:
                if line.startswith("http"):
                    video_urls.append(line)
                    video_names.append(self.date_today + "_" + str(i) + ".mp4")
                    i += 1

        return video_names, video_urls  # video_names, video_urls, nickname

    def get_download_url(self, video_url):
        """
        获得带水印的视频播放地址
        Parameters:
            video_url:带水印的视频播放地址
        Returns:
            download_url: 带水印的视频下载地址
        """
        req = requests.get(url=video_url, verify=False)
        bf = BeautifulSoup(req.text, "lxml")
        script = bf.find_all("script")[-1]
        video_url_js = re.findall("var data = \[(.+)\];", str(script))[0]
        video_html = json.loads(video_url_js)
        download_url = video_html["video"]["play_addr"]["url_list"][0]
        return download_url

    def video_downloader(self, video_url, video_name, watermark_flag=True):
        """
        视频下载
        Parameters:
            video_url: 带水印的视频地址
            video_name: 视频名
            watermark_flag: 是否下载不带水印的视频
        Returns:
            无
        """
        size = 0
        if watermark_flag == True:
            video_url = self.remove_watermark(video_url)
        else:
            video_url = self.get_download_url(video_url)
        with closing(requests.get(video_url, stream=True,
                                  verify=False)) as response:
            chunk_size = 1024
            content_size = int(response.headers["content-length"])
            if response.status_code == 200:
                sys.stdout.write("  [文件大小]:%0.2f MB\n" %
                                 (content_size / chunk_size / 1024))

                with open(video_name, "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        size += len(data)
                        file.flush()

                        sys.stdout.write("  [下载进度]:%.2f%%" %
                                         float(size / content_size * 100) +
                                         "\r")
                        sys.stdout.flush()

    def remove_watermark(self, video_url):
        """
        获得无水印的视频播放地址
        Parameters:
            video_url: 带水印的视频地址
        Returns:
            无水印的视频下载地址
        """
        self.driver.visit("http://douyin.iiilab.com/")
        self.driver.find_by_tag("input").fill(video_url)
        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
        html = self.driver.find_by_xpath(
            '//div[@class="thumbnail"]/div/p')[0].html
        bf = BeautifulSoup(html, "lxml")
        return bf.find("a").get("href")

    def run(self, input_f):
        """
        运行函数
        Parameters:
            None
        Returns:
            None
        """
        # self.hello()
        # user_id = input('请输入ID(例如40103580):')
        error_url = []
        video_names, video_urls = self.get_video_urls(input_f)
        if "douyin_download" + self.date_today not in os.listdir():
            os.mkdir("douyin_download" + self.date_today)
        print("视频下载中:共有%d个作品!\n" % len(video_urls))
        for num in range(len(video_urls)):
            print("  解析第%d个视频链接 [%s] 中,请稍后!\n" % (num + 1, video_urls[num]))
            random_wait = random.uniform(3, 5)
            print("waiting...", random_wait)
            time.sleep(random_wait)
            if "\\" in video_names[num]:
                video_name = video_names[num].replace("\\", "")
            elif "/" in video_names[num]:
                video_name = video_names[num].replace("/", "")
            else:
                video_name = video_names[num]
            try:
                self.video_downloader(
                    video_urls[num],
                    os.path.join("douyin_download" + self.date_today,
                                 video_name),
                )
            except:
                print("**************************")
                print("ERROR", video_urls[num])
                error_url.append(video_urls[num])

            print("\n")
        self.driver.close()
        with open("error_url.txt", "w") as f:
            f = error_url
        print("下载完成!")
        print("出错数量:", len(error_url))
Esempio n. 13
0
"""
Simply visit a URL to get some information,
just for improve in future, adding various ways to get
data from page with splinter API
"""
from splinter.browser import Browser

browser = Browser()
# Visit URL
url = "http://splinter.cobrateam.info/"
browser.visit(url)

# by property
print 'URL:', browser.url
print 'Page Title:', browser.title
# method
print 'H1:', browser.find_by_tag('h1').first.value
print 'Total Links:', len(browser.find_by_tag('a'))

browser.quit()
Esempio n. 14
0
class LemonLemon_douyin(object):
    def __init__(self, width=500, height=300):
        """
        抖音App视频下载
        """
        # 无头浏览器
        chrome_options = Options()
        chrome_options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"'
        )
        self.driver = Browser(driver_name="chrome",
                              options=chrome_options,
                              headless=True)

        self.tool = Tools()

    def get_video_urls(self, input_f):
        """
        获得视频播放地址
        Parameters:
            user_id:查询的用户ID
        Returns:
            video_names: 视频名字列表
            video_urls: 视频链接列表
            nickname: 用户昵称
        """
        video_names = []
        video_urls = []
        i = 1
        now_date = datetime.datetime.now()
        self.date_today = (str(now_date.date()) + "_" + str(now_date.hour) +
                           ":" + str(now_date.minute))

        with open(input_f) as f:
            for line in f:
                info = eval(line)
                ID = info["ID"]
                des_md5 = info["des_md5"]
                url = info["link"]
                video_urls.append(url)
                video_names.append(ID + "_" + des_md5 + ".mp4")
                i += 1

        return video_names, video_urls  # video_names, video_urls, nickname

    def get_download_url(self, video_url):
        """
        获得带水印的视频播放地址
        Parameters:
            video_url:带水印的视频播放地址
        Returns:
            download_url: 带水印的视频下载地址
        """
        req = requests.get(url=video_url, verify=False)
        bf = BeautifulSoup(req.text, "lxml")
        script = bf.find_all("script")[-1]
        video_url_js = re.findall("var data = \[(.+)\];", str(script))[0]
        video_html = json.loads(video_url_js)
        download_url = video_html["video"]["play_addr"]["url_list"][0]
        return download_url

    def video_downloader(self, video_url, video_name, watermark_flag=True):
        """
        视频下载
        Parameters:
            video_url: 带水印的视频地址
            video_name: 视频名
            watermark_flag: 是否下载不带水印的视频

        """
        size = 0
        if watermark_flag == True:
            video_url = self.remove_watermark(video_url)
        else:
            video_url = self.get_download_url(video_url)
        with closing(requests.get(video_url, stream=True,
                                  verify=False)) as response:
            chunk_size = 1024
            content_size = int(response.headers["content-length"])
            if response.status_code == 200:
                sys.stdout.write("  [文件大小]:%0.2f MB\n" %
                                 (content_size / chunk_size / 1024))

                with open(video_name, "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        size += len(data)
                        file.flush()

                        sys.stdout.write("  [下载进度]:%.2f%%" %
                                         float(size / content_size * 100) +
                                         "\r")
                        sys.stdout.flush()

    def remove_watermark(self, video_url):
        """
        获得无水印的视频播放地址
        Parameters:
            video_url: 带水印的视频地址
        Returns:
            无水印的视频下载地址
        """
        self.driver.visit("http://douyin.iiilab.com/")
        self.driver.find_by_tag("input").fill(video_url)
        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
        html = self.driver.find_by_xpath(
            '//div[@class="thumbnail"]/div/p')[0].html
        bf = BeautifulSoup(html, "lxml")
        return bf.find("a").get("href")

    def run(self, input_f):
        """
        运行函数
        Parameters:
            None
        Returns:
            None
        """
        # self.hello()
        # user_id = input('请输入ID(例如40103580):')
        error_url = []
        video_names, video_urls = self.get_video_urls(input_f)

        file = os.path.join(os.path.abspath(os.path.dirname(os.getcwd())),
                            "DOWNLOAD")

        if not os.path.exists(file):
            os.mkdir(file)

        print("视频下载中:共有%d个作品!\n" % len(video_urls))
        for num in range(len(video_urls)):
            print("  解析第%d个视频链接 [%s] 中,请稍后!\n" % (num + 1, video_urls[num]))
            random_wait = random.uniform(3, 5)
            print("waiting...", random_wait)
            time.sleep(random_wait)
            """
            if "\\" in video_names[num]:
                video_name = video_names[num].replace("\\", "")
            elif "/" in video_names[num]:
                video_name = video_names[num].replace("/", "")
            else:
                video_name = video_names[num]
            """
            video_name = video_names[num]
            ID = video_name.split("_")[0]

            # 判断文件夹是否存在
            if ID not in os.listdir(file):
                os.mkdir(os.path.join(file, ID))

            # 判断要下载的文件是否存在

            video_file = os.path.join(file, ID, video_name)
            if not os.path.exists(video_file):
                try:

                    self.video_downloader(
                        video_urls[num],
                        os.path.join(file, ID, video_name),
                    )

                    self.tool.writeToFile(video_name, "SuccessDownload")

                except:
                    print("**************************")
                    print("ERROR", video_urls[num])
                    error_url.append(video_urls[num])
                print("\n")
            else:
                print(video_name + "文件已存在")

        #self.driver.close()
        with open("error_url.txt", "w") as f:
            f = error_url
        print("下载完成!")

        print("出错数量:", len(error_url))
Esempio n. 15
0
class DouYin(object):
	def __init__(self, width = 500, height = 300):
		"""
		抖音App视频下载
		"""
		# 无头浏览器
		chrome_options = Options()
		chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"')
		self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True)

	def get_video_urls(self, user_id):
		"""
		获得视频播放地址
		Parameters:
			user_id:查询的用户ID
		Returns:
			video_names: 视频名字列表
			video_urls: 视频链接列表
			nickname: 用户昵称
		"""
		video_names = []
		video_urls = []
		unique_id = ''
		while unique_id != user_id:
			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
			req = requests.get(url = search_url, verify = False)
			html = json.loads(req.text)
			aweme_count = html['user_list'][0]['user_info']['aweme_count']
			uid = html['user_list'][0]['user_info']['uid']
			nickname = html['user_list'][0]['user_info']['nickname']
			unique_id = html['user_list'][0]['user_info']['unique_id']
		user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
		req = requests.get(url = user_url, verify = False)
		html = json.loads(req.text)
		i = 1
		for each in html['aweme_list']:
			share_desc = each['share_info']['share_desc']
			if '抖音-原创音乐短视频社区' == share_desc:
				video_names.append(str(i) + '.mp4')
				i += 1
			else:
				video_names.append(share_desc + '.mp4')
			video_urls.append(each['share_info']['share_url'])

		return video_names, video_urls, nickname

	def get_download_url(self, video_url):
		"""
		获得带水印的视频播放地址
		Parameters:
			video_url:带水印的视频播放地址
		Returns:
			download_url: 带水印的视频下载地址
		"""
		req = requests.get(url = video_url, verify = False)
		bf = BeautifulSoup(req.text, 'lxml')
		script = bf.find_all('script')[-1]
		video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
		video_html = json.loads(video_url_js)
		download_url = video_html['video']['play_addr']['url_list'][0]
		return download_url

	def video_downloader(self, video_url, video_name, watermark_flag=True):
		"""
		视频下载
		Parameters:
			video_url: 带水印的视频地址
			video_name: 视频名
			watermark_flag: 是否下载不带水印的视频
		Returns:
			无
		"""
		size = 0
		if watermark_flag == True:
			video_url = self.remove_watermark(video_url)
		else:
			video_url = self.get_download_url(video_url)
		with closing(requests.get(video_url, stream=True, verify = False)) as response:
			chunk_size = 1024
			content_size = int(response.headers['content-length']) 
			if response.status_code == 200:
				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))

				with open(video_name, "wb") as file:  
					for data in response.iter_content(chunk_size = chunk_size):
						file.write(data)
						size += len(data)
						file.flush()

						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
						sys.stdout.flush()


	def remove_watermark(self, video_url):
		"""
		获得无水印的视频播放地址
		Parameters:
			video_url: 带水印的视频地址
		Returns:
			无水印的视频下载地址
		"""
		self.driver.visit('http://douyin.iiilab.com/')
		self.driver.find_by_tag('input').fill(video_url)
		self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
		html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html
		bf = BeautifulSoup(html, 'lxml')
		return bf.find('a').get('href')

	def run(self):
		"""
		运行函数
		Parameters:
			None
		Returns:
			None
		"""
		self.hello()
		user_id = input('请输入ID(例如40103580):')
		video_names, video_urls, nickname = self.get_video_urls(user_id)
		if nickname not in os.listdir():
			os.mkdir(nickname)
		print('视频下载中:共有%d个作品!\n' % len(video_urls))
		for num in range(len(video_urls)):
			print('  解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num]))
			if '\\' in video_names[num]:
				video_name = video_names[num].replace('\\', '')
			elif '/' in video_names[num]:
				video_name = video_names[num].replace('/', '')
			else:
				video_name = video_names[num]
			self.video_downloader(video_urls[num], os.path.join(nickname, video_name))
			print('\n')

		print('下载完成!')

	def hello(self):
		"""
		打印欢迎界面
		Parameters:
			None
		Returns:
			None
		"""
		print('*' * 100)
		print('\t\t\t\t抖音App视频下载小助手')
		print('\t\t作者:Jack Cui')
		print('*' * 100)
Esempio n. 16
0
br.set_cookiejar(cj)

browser = Browser()
            
#As of March 27, 2016
inp = csv.reader(file(fd+'Complete_list.csv','rb'))
head = inp.next()
for e,i in enumerate(head):
    print e,i

fd2 = 'g:/health_data/provider_urls/'

for i in inp:
    if not re.search("^None|NOT SUBMITTED",i[2]):
        print i[1]
        try:
            outp = csv.writer(open(os.path.join(fd2,i[1]+'.csv'),'wb'),delimiter='\t')
            browser.visit(i[2])
            sleep(1)
            try:
                need = browser.find_by_css('pre')
                proc = json.loads(need[0].text)
            except:
                need = browser.find_by_tag('body')
                proc = json.loads(re.sub('}.*?$','}',need[0].text))
            for p in proc['provider_urls']:
                outp.writerow([p])
            #call('taskkill /F /IM firefox.exe')
        except:
            traceback.print_exc()