Ejemplo n.º 1
0
def scrap(url, fail_time=0):
    timeout = config.TIMEOUT

    print u'正在请求', url, u', 请稍后...'

    try:
        driver = config.DRIVER
        driver.get(url)
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.ID, "J_TabRecommends"))
        )
        result = get_recommends(driver, config.MAX_TRY)
        if result:
            print u'查找成功'
            html = driver.page_source
            parse_content(html)
        else:
            print u'请求超时, 获取失败, 此页面不存在相应内容'
    except TimeoutException:
        if fail_time >=2 :
            print u'请求超时, 正在切换代理, 继续重试'
            update_proxy_pool()
            new_proxy_driver()
        else:
            print u'请求超时,正在切换会话重试'
            new_driver()
        fail_time = fail_time + 1
        if config.CONSOLE_OUTPUT:
            print u'当前页面请求失败数', fail_time
        if fail_time == config.MAX_FAIL:
            update_proxy_pool()
            if config.CONSOLE_OUTPUT:
                print u'失败次数过多, 跳过此请求'
            return False
        scrap(url, fail_time)
    except (socket.error, urllib2.URLError):
        print u'请求页面过于频繁, 请求被中断, 正在切换会话重试'
        new_driver()
        fail_time = fail_time + 1
        if config.CONSOLE_OUTPUT:
            print u'当前页面请求失败数', fail_time
        if fail_time == config.MAX_FAIL:
            if config.CONSOLE_OUTPUT:
                print u'失败次数过多, 跳过此请求'
            return False
        scrap(url, fail_time)
    except (WindowsError, OSError, Exception):
        print u'未知错误, 跳过继续运行'
Ejemplo n.º 2
0
def scrap(url, fail_time=0):
    timeout = config.TIMEOUT

    print u'正在请求', url, u', 请稍后...'

    try:
        driver = config.DRIVER
        driver.get(url)
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.ID, "J_TabRecommends")))
        result = get_recommends(driver, config.MAX_TRY)
        if result:
            print u'查找成功'
            html = driver.page_source
            parse_content(html)
        else:
            print u'请求超时, 获取失败, 此页面不存在相应内容'
    except TimeoutException:
        if fail_time >= 2:
            print u'请求超时, 正在切换代理, 继续重试'
            update_proxy_pool()
            new_proxy_driver()
        else:
            print u'请求超时,正在切换会话重试'
            new_driver()
        fail_time = fail_time + 1
        if config.CONSOLE_OUTPUT:
            print u'当前页面请求失败数', fail_time
        if fail_time == config.MAX_FAIL:
            update_proxy_pool()
            if config.CONSOLE_OUTPUT:
                print u'失败次数过多, 跳过此请求'
            return False
        scrap(url, fail_time)
    except (socket.error, urllib2.URLError):
        print u'请求页面过于频繁, 请求被中断, 正在切换会话重试'
        new_driver()
        fail_time = fail_time + 1
        if config.CONSOLE_OUTPUT:
            print u'当前页面请求失败数', fail_time
        if fail_time == config.MAX_FAIL:
            if config.CONSOLE_OUTPUT:
                print u'失败次数过多, 跳过此请求'
            return False
        scrap(url, fail_time)
    except (WindowsError, OSError, Exception):
        print u'未知错误, 跳过继续运行'
Ejemplo n.º 3
0
def index():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'header_file' not in request.files:
            flash('No file')
            return redirect(request.url)
        header_file = request.files['header_file']
        # if user does not select file, browser also
        # submit a empty part without filename
        if header_file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        else:
            content = header_file.read()
            data = parse_content(content)
            session['data'] = data
            return redirect(
                url_for('index'))  # Follow POST/Redirect/Get Pattern
    return render_template('index.html', data=session.get('data'))
Ejemplo n.º 4
0
def get_entries(service, calendar_id, time_min):
    """Returns a dictionary of entries indexed by their corresponding LMD
    calendar and event ID, as a 2-tuple of integers."""
    events = service.events()
    page_token = ""
    remote_entries = dict()
    while True:
        kwargs = {"calendarId": calendar_id, "timeMin": time_min.strftime(TIME_FORMAT)}
        if page_token:
            kwargs["pageToken"] = page_token
        event_list = events.list(**kwargs).execute()
        if "items" in event_list:
            for i, ei in enumerate(event_list["items"]):
                event = events.get(calendarId=calendar_id, eventId=ei["id"]).execute()
                key = parse_content(event["description"])
                remote_entries[key] = event
        page_token = event_list.get("nextPageToken")
        if not page_token:
            break
    return remote_entries
Ejemplo n.º 5
0
 def test(self):
     questions = parse_content(CONTENT)
     self.assertEqual(len(questions), 2)
Ejemplo n.º 6
0
import parse as parser
import disk


trellis_url = 'https://rippleneuro.com/support/software-downloads-updates/'


if __name__ == '__main__':
    current_version = disk.read_version() # Current version of trellis

    try:
        # Get web html
        html = network.get_content(trellis_url) 

        # Parse web html to get new version
        new_version = parser.parse_content(html) 
        if current_version != new_version:
            # New version available
            disk.write_version(new_version)
            ctypes.windll.user32.MessageBoxW(
                0, f'Trellis Version {new_version} is available', 
                'New Version', 1)
        else:
            # No new version
            ctypes.windll.user32.MessageBoxW(
                0, f'Trellis Version {current_version} is up to date', 
                'No New Version', 1)

    except (network.NetworkException, parser.ParseException) as e:
        print(e)
        ctypes.windll.user32.MessageBoxW(
Ejemplo n.º 7
0
def save(url):
    html = html_download(url, 'utf-8')
    title, contents = parse_content(html)
    save_to_txt(title, contents)
Ejemplo n.º 8
0
 def test(self):
     questions = parse_content(CONTENT)
     self.assertEqual(len(questions), 2)