Ejemplo n.º 1
0
def main():
    urls = [
        # 'http://v.youku.com/v_show/id_XNzUyNDE4MTQw.html'
        # 'http://i.youku.com/u/UNTc4NzI3MjY0',
        # 'http://v.youku.com/v_show/id_XNzQ5NDAwMDIw.html?from=y1.1-2.10001-0.1-1',
        # 'http://v.youku.com/v_show/id_XNzUwMTE2MDQw.html?f=22611771',
        # 'http://v.youku.com/v_show/id_XNzQ3MjMxMTYw.html',
        'http://video.sina.com.cn/p/ent/v/m/2014-08-14/102164094039.html'
    ]
    log = util.get_logger()
    bar = ProgressBar()
    ws = WorkShop(tmin=1, tmax=2, log=log)
    dlvs = []
    for i, url in enumerate(urls):
        dlvideo = VUrlTask(url, 0, 3, './tmp', bar=bar, log=log)
        dlvs.append(dlvideo)
    try:
        ws.serve()
        ws.addTasks(dlvs)
        while len(dlvs) > 0:
            for i, dlv in enumerate(dlvs):
                if dlv.isArchived() or dlv.isError():
                    del dlvs[i]
            _sleep(1)
    except KeyboardInterrupt:
        pass
    except Exception as e:
        log.exception(e)
    finally:
        ws.setToStop()
        ws.join()
Ejemplo n.º 2
0
def main():
    urls = [
        # 'http://v.youku.com/v_show/id_XNzUyNDE4MTQw.html'
        # 'http://i.youku.com/u/UNTc4NzI3MjY0',
        # 'http://v.youku.com/v_show/id_XNzQ5NDAwMDIw.html?from=y1.1-2.10001-0.1-1',
        # 'http://v.youku.com/v_show/id_XNzUwMTE2MDQw.html?f=22611771',
        # 'http://v.youku.com/v_show/id_XNzQ3MjMxMTYw.html',
        'http://video.sina.com.cn/p/ent/v/m/2014-08-14/102164094039.html'
    ]
    log = util.get_logger()
    bar = ProgressBar()
    ws = WorkShop(tmin=1, tmax=2, log=log)
    dlvs = []
    for i, url in enumerate(urls):
        dlvideo = VUrlTask(url, 0, 3, './tmp', bar=bar, log=log)
        dlvs.append(dlvideo)
    try:
        ws.serve()
        ws.addTasks(dlvs)
        while len(dlvs) > 0:
            for i, dlv in enumerate(dlvs):
                if dlv.isArchived() or dlv.isError():
                    del dlvs[i]
            _sleep(1)
    except KeyboardInterrupt:
        pass
    except Exception as e:
        log.exception(e)
    finally:
        ws.setToStop()
        ws.join()
Ejemplo n.º 3
0
 def __init__(self, name="<?thread>", log=None):
     self._name = name
     self._thread = None
     self.log = get_logger(log)
     self._is_paused=threading.Event()
     self._is_paused.clear()
     self._is_running=threading.Event()
     self._is_running.clear()
Ejemplo n.º 4
0
def main():
    scanner = IPScanner(util.get_logger())
    scanner.info_duration = 5
    global coutries_filter
    coutries_filter = {'Korea'}
    scanner.start()
    _sleep(0.5)
    if scanner.isAvailable():
        print scanner.allip
    try:
        _sleep(1)
        while True:
            _sleep(1)
    except KeyboardInterrupt as e:
        print 'stop by user'
    finally:
        if scanner.isAlive():
            scanner.setToStop()
            scanner.join()
Ejemplo n.º 5
0
def main():
    scanner = IPScanner(util.get_logger())
    scanner.info_duration = 5
    global coutries_filter
    coutries_filter = {'Korea'}
    scanner.start()
    _sleep(0.5)
    if scanner.isAvailable():
        print scanner.allip
    try:
        _sleep(1)
        while True:
            _sleep(1)
    except KeyboardInterrupt as e:
        print 'stop by user'
    finally:
        if scanner.isAlive():
            scanner.setToStop()
            scanner.join()
Ejemplo n.º 6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import time
import hashlib
import json
from vavava.httputil import HttpUtil
from vavava.httputil import DownloadStreamHandler
from vavava import util

util.set_default_utf8()
LOG = util.get_logger()
CHARSET = "utf-8"

class Spider:

    def __init__(self):
        self.http = HttpUtil(charset="utf-8")
        self.http.header_refer_ = "http://v.ifeng.com/include/ifengLivePlayer_v1.40.4.swf"
        self.http.header_user_agent_ = r"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
        self.http.add_header("x-flash-version", "11,5,502,146")
        self.http.add_header("Accept-Language", "zh-CN")
        self.http.add_header("Accept", "*/*")
        self.http.add_header("Proxy-Connection", "Keep-Alive")
        self.uuid = ""
        self.flv_location = ""
        self.schedule_json = None
        self.channels = {}
        self.down_handle = None
Ejemplo n.º 7
0
#!/usr/bin/env python
# coding=utf-8

import sys
import json
import StringIO
import BaseHTTPServer
from time import sleep as _sleep
from SimpleHTTPServer import SimpleHTTPRequestHandler
from ipscanner import IPScanner
from vavava import util

ServerClass = BaseHTTPServer.HTTPServer
Protocol = "HTTP/1.0"
log = util.get_logger()
gIpScanner = IPScanner(log=util.get_logger())


class MyRequestHandler(SimpleHTTPRequestHandler):
    def send_head(self):
        content_type = 'text/html; charset=utf-8'
        param = ''
        req_path = self.path
        if req_path.find('?') > 0:
            req_path, param = req_path.split('?')
        if req_path in ('/'):
            self.path = '/www/index.html'
            return SimpleHTTPRequestHandler.send_head(self)
        elif req_path in ('/curr'):
            iplist = [[ip.duration, ip.ip, ip.country, ip.timeString]
                      for ip in gIpScanner.currBuff]
Ejemplo n.º 8
0
def getmatches(string):
    regstr = """机构名称:\s*([^\<]*)[^\<]*\</p>[^\>]+>机构地址:\s*([^\<]*)[^\<]*\</p>[^\>]+>区县:\s*([^\<]*)[^\<]*\</p>[^\>]+>机构电话:\s*([^\<]*)[^\<]*\</p>[^\>]+>基本医保点:\s*([^\<]*)[^\<]*\</p>[^\>]+>医保编码:\s*([^\<]*)[^\<]*\</p>[^\>]+>新农合定点:\s*([^\<]*)[^\<]*\</p>[^\>]+>邮政编码:\s*([^\<]*)[^\<]*\</p>[^\>]+>"""
    matches = util.reg_helper(string, regstr)
    return matches

def save(matches, num):
    with open("%d.txt"%num, "w") as f:
        f.write("%s,%s,%s,%s,%s,%s,%s,%s,\n"%("机构名称", "机构地址", "区县", "机构电话", "基本医保点", "医保编码", "新农合定点", "邮政编码"))
        for match in matches:
            line = "%s,%s,%s,%s,%s,%s,%s,%s,\n"%match
            f.write(line)
            global total
            total += 1

if __name__ == "__main__":
    log = util.get_logger()
    try:
        i = 0
        for url in geturls():
            content = httputil.http_get(url)
            matches = getmatches(content)
            save(matches, i)
            i += 1
        print total
    except KeyboardInterrupt as e:
        print 'stop by user'
        exit(0)
    except Exception as e:
        log.exception(e)

Ejemplo n.º 9
0

def mainTest(axel, bar, log):
    cmd = "1"  #'1,2,3,4,5,6'# raw_input('n=')
    for n in cmd.split(','):
        n = int(n)
        for md5, url in test_urls.items():
            fTestFunc(axel, bar, url, md5, n, log)
            # mTestFunc(axel, url, md5, n, log)
            log.debug('add a test work: %s,%s,%d', url, md5, n)


from vavava.threadutil import WorkShop
from vavava.util import get_logger
if __name__ == '__main__':
    log = get_logger()
    bar = ProgressBar()
    axel = WorkShop(tmin=2, tmax=5, log=log)
    try:
        if not axel.serve(timeout=3):
            raise ValueError('server not started')
        mainTest(axel, bar, log)
        while True:
            _sleep(1)
            if axel.allTasksDone():
                if raw_input('again ??') in ('y'):
                    mainTest(axel, bar, log)
                else:
                    break
    except KeyboardInterrupt as e:
        pass
Ejemplo n.º 10
0
#!/usr/bin/env python
# coding=utf-8

import sys
import json
import StringIO
import BaseHTTPServer
from time import sleep as _sleep
from SimpleHTTPServer import SimpleHTTPRequestHandler
from ipscanner import IPScanner
from vavava import util

ServerClass  = BaseHTTPServer.HTTPServer
Protocol     = "HTTP/1.0"
log = util.get_logger()
gIpScanner = IPScanner(log=util.get_logger())

class MyRequestHandler(SimpleHTTPRequestHandler):

    def send_head(self):
        content_type = 'text/html; charset=utf-8'
        param = ''
        req_path = self.path
        if req_path.find('?') > 0:
            req_path, param = req_path.split('?')
        if req_path in ('/'):
            self.path = '/www/index.html'
            return SimpleHTTPRequestHandler.send_head(self)
        elif req_path in ('/curr'):
            iplist = [[ip.duration, ip.ip, ip.country, ip.timeString] for ip in gIpScanner.currBuff]
            html = json.dumps({'name': 'curr', 'data': iplist, 'columns': ['duration', 'ip', 'country', 'time']})
Ejemplo n.º 11
0
 def __init__(self, db_path):
     self.log = util.get_logger(level=CONFIG.log_level)
     self.categries = {}
     self.pool = None
     self.dbpool = sqliteutil.dbpool(path=db_path, cls=dbutil.DBUrl)
Ejemplo n.º 12
0
 def __init__(self, path):
     json_config.SimpleJsonConfig.__init__(self, path)
     util.get_logger().info("load config file:%s", path)
     if not hasattr(self, "http_proxy") or len(self.http_proxy)==0:
         self.http_proxy = None
Ejemplo n.º 13
0
                      retrans=True, callback=archive_callback)
    axel.addTask(urltask)

def mainTest(axel, bar, log):
    cmd = "1" #'1,2,3,4,5,6'# raw_input('n=')
    for n in cmd.split(','):
        n = int(n)
        for md5, url in test_urls.items():
            fTestFunc(axel, bar, url, md5, n, log)
            # mTestFunc(axel, url, md5, n, log)
            log.debug('add a test work: %s,%s,%d', url, md5, n)

from vavava.threadutil import WorkShop
from vavava.util import get_logger
if __name__ == '__main__':
    log = get_logger()
    bar = ProgressBar()
    axel = WorkShop(tmin=2, tmax=5, log=log)
    try:
        if not axel.serve(timeout=3):
            raise ValueError('server not started')
        mainTest(axel, bar, log)
        while True:
            _sleep(1)
            if axel.allTasksDone():
                if raw_input('again ??') in ('y'):
                    mainTest(axel, bar, log)
                else:
                    break
    except KeyboardInterrupt as e:
        pass