Example #1
0
def _preload():
    for url in feed_db().keys():
        try:
            proxy(url)
        except Exception:
            log.warning("Couldn't preload %s", url)
    return ("Done", 204, {})
Example #2
0
def hashFile(event, context):
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key'].encode('utf8'))
    print("File: %s:%s" % (bucket, key))

    proxyPattern = os.environ.get("PROXY_PATTERN")
    if proxyPattern and re.match(proxyPattern,key):
        return proxy.proxy(event,context, funcName="hashProxy")

    if key.endswith(".md5") or key.endswith(".sha1"):
        print("Nothing to do")
        return

    try:
        response = s3.get_object(Bucket=bucket, Key=key)

        md5 = hash_stream(response['Body'],hashlib.md5())
        s3.put_object(Bucket=bucket,Key=key+".md5", Body=md5)

        sha1 = hash_stream(response['Body'],hashlib.sha1())
        s3.put_object(Bucket=bucket,Key=key+".sha1", Body=sha1)

        print("MD5: %s SHA1: %s" % (md5,sha1))

        return { "md5": md5, "sha1":sha1 }

    except Exception as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
        raise e
Example #3
0
    def distribute_request(self,http_req_handler):
        '''
        根据URL匹配规则路由请求到相应地处理器
        '''
        path = urlparse(http_req_handler.path).path
        handled = False

        #代理支持
        if C('enable_proxy') and utils.isDict(C('proxy')):
            for reg,target in C('proxy').items():
                target_path = get_proxy_url(http_req_handler.path,reg,target)
                if target_path:
                     log.info('[proxy](%s) to (%s)'%(http_req_handler.path,target_path))
                     return proxy(target_path,Request(http_req_handler),Response(http_req_handler))

        for h in self.handlers:
            if 'ALL' == h.get('method') or h.get('method') == http_req_handler.command and re.findall(h.get('pattern'),path):
                handled = True
                ret = (h.get('handler'))(Request(http_req_handler),Response(http_req_handler))
                if True == ret:
                    continue
                else:
                    break
        #if not handled by any handlers,405
        if not handled:
            log.error('%s is not handled'%path)
            http_req_handler.send_header(405,'%s not supported'%path)
            http_req_handler.end_headers()
            self.http_req_handler.wfile.close()
Example #4
0
    def requesetGet(self, url):
        """
        统一管理request的get请求
        :param url:
        :return:
        """

        if hasattr(self, 'ses') == False:
            self.ses = requests.session()

        ret = None

        try:
            if hasattr(self, 'proxy') == False:
                self.proxy = proxy.proxy()

            retry_count = 1
            while True:
                ret = self.ses.get(url, proxies = self.proxy.getProxy())
                
                if ret.status_code == 429:
                    wait = random.randint(1, 3)
                    time.sleep(wait)
                    self.log.info(u'requesetGet, 请求代理超过5个,返回429{0},随机等待{1}秒'.format(url, wait))
                    continue
                elif ret.status_code != 200 and retry_count < 3:
                    wait = random.randint(1, 3)
                    time.sleep(wait)
                    self.log.info(u'requesetGet, 请求失败,返回code:{2}, url:{0},随机等待{1}秒'.format(url, wait, ret.status_code))
                    retry_count += 1
                    continue
                else:
                    break
        except:
            # ip = self.proxy.getCurIp()
            # self.log.error(u'代理请求数据异常, url:{0}, ip:{1}'.format(url, ip))
            self.log.error(u'代理请求数据异常, url:{0}'.format(url))
            self.log.error(traceback.format_exc())
        self.log.info(u'代理请求数据完成,url:{0}'.format(url))

        return ret
Example #5
0
 def __init__(self, world_module, port, arg):
     self.mud_encoding = 'iso-8859-1'
     self.client_encoding = 'utf-8'
     self.world_module = world_module
     self.arg = arg
     self.world = world_module.getClass()(self, self.arg)
     try:
         self.socketToPipeR, self.pipeToSocketW, self.stopFlag, runProxy = proxy(
             '::1', port)
         self.pipeToSocketW = os.fdopen(self.pipeToSocketW, 'wb')
         self.proxyThread = threading.Thread(target=runProxy)
         self.proxyThread.start()
         host_port = self.world.getHostPort()
         self.log("Connecting")
         self.telnet = self.connect(*host_port)
         self.log("Connected")
     except:
         self.log("Shutting down")
         self.stopFlag.set()
         self.world.quit()
         raise
 def __init__(self, browser_type='chrome', path=''):
     self.logger = logging.getLogger('AliCompany')
     self.proxy = proxy('chrome')
Example #7
0
 def __init__(self, browser_type='chrome'):
     self.logger = logging.getLogger('PcCompany')
     self.init_mongo()
     self.init_redis()
     self.proxy = proxy('chrome')
     self.init_browser(browser_type)
import logging
import traceback
import selenium.webdriver.support.ui as ui
import pymongo
import datetime
from pic_recognise import PicRecognise
from proxy import proxy

reload(sys)
sys.setdefaultencoding('utf-8')

pic_vcode_name = os.path.join(os.path.dirname(__file__), 'image',
                              'vcode.jpg')  #测试图片文件
pic_ab_name = os.path.join(os.path.dirname(__file__), 'image', 'ab.jpg')

proxy = proxy('chrome')


def init_browser(browser_type):
    pass


def browser_quit(browser):
    try:
        if browser:
            pid = browser.service.process.pid
            os.kill(pid, 9)
    except Exception, e:
        pass
    finally:
        browser = None
Example #9
0
 def __init__(self):
     self.logger = logging.getLogger('MBaiduKeyword')
     self.init_oracle()
     self.proxy = proxy('phantomjs')
Example #10
0
from proxy import proxy
from sys import argv, exit

try:
    p = proxy(argv[1], int(argv[2]), argv[3], int(argv[4]))
except:
    print 'Use: main.py <Server IP> <Server Port> <Client IP> <Client Port>'
    exit(1)

p.initialize()
p.start()
Example #11
0
def urls(environ):
    if environ['PATH_INFO'] == '/fun/page':
        return proxy.page(environ)
    else:
        return proxy.proxy(environ)
Example #12
0
 def proxy_run(self):
     from proxy import proxy
     p = proxy(self._proxy_qs)
     p.on_run()
Example #13
0
 def setUp(self):
     self.object = X()
     self.proxy = proxy.proxy(self.object)
Example #14
0
from proxy import proxy, HttpResponse


def func(req):
    if 's' not in req.args:
        return HttpResponse('', code=400)

    return HttpResponse(req.args['s'])


main = proxy(func)
Example #15
0
 def proxy_run(self):
     from proxy import proxy
     p = proxy(self._proxy_qs)
     p.on_run()
Example #16
0
class auto_brush():

    p = proxy.proxy()

    urls = ['https://www.woquba.cn']
    url_weight = []

    ips = []

    threads = []
    thread_num = 10

    time_distribution = [
        2, 1, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 9, 7, 6, 5, 5, 7, 8, 9, 9, 8, 7, 5
    ]
    total_fb = sum(time_distribution)
    base_num = 10000

    last_hour = 0
    last_min = 0
    rand = random.randint(-10, 10)
    last_task_num = int(
        (base_num / total_fb) * (time_distribution[last_hour] + rand / 10.0))

    residue_sec = 3600
    residue_task = last_task_num * len(urls)

    #########################################################################
    #
    #                   自动刷量主程序
    #
    #########################################################################
    def run(self):
        self.__init_url_weight__()
        #self.__update_ips__()

        while True:

            #print 'residue task num =',self.residue_task,'residue_sec =',self.residue_sec

            self.__update_task__()
            if len(self.ips) <= 0:
                self.__update_ips__()

            if self.residue_task > 0:
                #if len(self.threads) < self.residue_task:
                self.__add_thread_task__()
                self.residue_task = self.residue_task - 1
                #print "add thread task,residue task is",self.residue_task
            else:
                time.sleep(self.residue_sec)

            if self.residue_sec > self.residue_task:
                st = self.residue_sec / self.residue_task
                #print "sleep time is",st,"residue_sec is ",self.residue_sec
                time.sleep(st)
                self.residue_sec = self.residue_sec - st

    #########################################################################
    #
    #                   更新任务列表和代理ip列表{}
    #
    #########################################################################
    def __update_task__(self):
        now = datetime.datetime.now()
        hour = now.hour
        minite = now.minute

        if hour is not self.last_hour:
            self.rand = random.randint(-10, 10)
            self.last_task_num = int(
                (self.base_num / self.total_fb) *
                (self.time_distribution[hour] + self.rand / 10.0))
            self.last_hour = hour
            self.residue_sec = 3600
            self.residue_task = self.last_task_num * len(self.urls)

        if ((minite - self.last_min) > 5) | (hour is not self.last_hour):
            self.__update_ips__()
            self.last_min = minite

    #########################################################################
    #
    #                   添加一个线程
    #
    #########################################################################
    def __add_thread_task__(self):
        for t in self.threads:
            if not t.isAlive():
                self.threads.remove(t)
                #print "thread release,thread num ",t

        #if len(self.threads)<self.thread_num:
        index = self.__get_url_index__()
        t = threading.Thread(target=self.__task__, args=(index, ))
        self.threads.append(t)
        #t.setDaemon(True)
        t.start()

    #########################################################################
    #
    #                   线程处理函数
    #
    #########################################################################
    def __task__(self, index):

        b = browser.SeleniumBrowser()
        self.url_weight[index] = self.url_weight[index] + 1
        flag = False

        while flag is False:
            try:
                proxyIp = random.choice(self.ips)
                requests.get('http://www.baidu.com',
                             timeout=5,
                             proxies={"http": "http://" + proxyIp})
            except:
                print 'connect failed', proxyIp
                self.ips.remove(proxyIp)
            else:
                flag = True

        proxyL = proxyIp.split(':')
        ht = 'http'
        ip = proxyL[0]
        port = proxyL[1]
        self.ips.remove(proxyIp)
        print 'start task.....proxy is', proxyL, 'residue_sec is', self.residue_sec, 'residue_task is', self.residue_task, 'residue_ips is', len(
            self.ips)

        b.open_url_proxy(self.urls[index], ht, ip, port)
        #b.open_url(self.urls[index])
        self.url_weight[index] = self.url_weight[index] - 1
        #print "thread start,thread num is",len(self.threads),index

    #########################################################################
    #
    #                   初始化网站列表中的各网站任务分布
    #
    #########################################################################
    def __init_url_weight__(self):
        for i in range(len(self.urls)):
            # self.url_weight[i]=0
            self.url_weight.append(0)

    #########################################################################
    #
    #                   更新任务列表和代理ip列表{}
    #
    #########################################################################
    def __update_ips__(self):
        self.ips = self.p.get_proxy_ips()

    #########################################################################
    #
    #           根据网站任务分布情况选择一个处理量最低的网站索引
    #
    #########################################################################
    def __get_url_index__(self):
        minNum = self.url_weight[0]
        index = 0
        for i in range(len(self.url_weight)):
            if minNum > self.url_weight[i]:
                minNum = self.url_weight[i]
                index = i

        return index
Example #17
0
def _twitter(id, date=lambda: (date.today() - timedelta(days=10)).strftime("%Y-%m-%d")):
    date = date()
    return proxy(
        "https://twitrss.me/twitter_search_to_rss/"
        + "?term=from%3A{id}+since%3A{date}".format(id=id, date=date)
    )
Example #18
0
def _medium(id):
    return proxy("https://medium.com/feed/@" + id)
Example #19
0
def _feed(url):
    return proxy(url)
Example #20
0
def init():
    ret = proxy.proxy('https://www.baidu.com')
    if (ret.status_code == 200):
        print(ret.text)
Example #21
0
 def __init__(self, browser_type='chrome'):
     self.logger = logging.getLogger('MBaiduKeyword')
     self.init_oracle()
     self.proxy = proxy('chrome')