コード例 #1
0
    def run(self):
        while not self.workQueue.empty():
            item = self.workQueue.get()
            if not isinstance(item, list):
                WARNING("[Download] error type, return!!")
                return

            try:
                url = item[0]
            except Exception:
                WARNING("[Download] url isn't exists, return !!")
                return

            try:
                download_type = item[1]
            except Exception:
                WARNING("[Download] error in download_type!!, return!!")
                return

            method = item[2]
            filename = item[3]

            if download_type == "picture":
                self.download_picture(image_url = url, filename = filename)
            elif download_type == "anything":
                self.download_anything(image_url = url, filename = filename)
            elif download_type == "custom":
                pass
コード例 #2
0
ファイル: Schedule.py プロジェクト: sorcerer0001/-scrapy-
 def Putresult_Get(self, response):
     if not isinstance(response, list):
         WARNING("[Schedule] [result] the wrong params!!")
         return
     print len(response)
     print "mm"
     self.result_get.append(response)
コード例 #3
0
ファイル: Schedule.py プロジェクト: sorcerer0001/-scrapy-
    def AddTodo_Get(self, object):
        if not isinstance(object, Request):
            WARNING("[Schedule] [GET]the wrong params!!")
            return
        obj = object
        item = []

        if obj.url is None:
            WARNING("[Schedule] [GET] without url ??")
            return

        item.append(obj.url)
        item.append(obj.method)
        item.append(obj.headers)
        item.append(obj.callback)
        INFO("[Schedule] [GET] url = {}, method = {}, hearders = {}".format(
            obj.url, obj.method, obj.headers))
        self.Works_get.append(item)
コード例 #4
0
ファイル: Schedule.py プロジェクト: sorcerer0001/-scrapy-
    def AddTodo_Post(self, object):
        if not isinstance(object, object):
            WARNING("[Schedule] [POST] [POST]the wrong params!!")
            return

        obj = object
        item = []

        if (obj.url is None or obj.request is None):
            WARNING("[Schedule] [POST] without url or request??")
            return

        item.append(obj.url)
        item.append(obj.method)
        item.append(obj.request)
        item.append(obj.headers)
        item.append(obj.func)
        INFO(
            "[Schedule] [POST] url = %s, method = %s, request = %s, headers = %s",
            obj.url, obj.method, obj.request, obj.headers)
        self.Works_post.append(item)
コード例 #5
0
ファイル: Schedule.py プロジェクト: sorcerer0001/-scrapy-
    def PutToDownload(self, object):
        if not isinstance(object, Request):
            WARNING("[Schedule] input incorrect download params, stop!!")
        obj = object

        url = obj.url
        if url is None:
            WANRING("[Schedule] url is empty ??   stop schedule!!!")
            return
        else:
            INFO("[Schedule] The url to download is {}".format(url))

        download_type = obj.download_type
        if download_type is None:
            WANRING("[Schedule] download_type is None, stop schedule")

        method = obj.method
        if method != "DOWNLOAD":
            WARNING(
                "[Schedule] method is not 'DOWNLOAD' , switch method to download"
            )
            method == "DOWNLOAD"

        filename = obj.filename
        if filaname is None:
            INFO("[Schedule] there is not exists filename")
        else:
            INFO("[Schedule] [download] filename is {}".format(filename))

        item = []

        item.append(url)
        item.append(download_type)
        item.append(method)
        item.append(filename)

        self.download_list.append(item)
コード例 #6
0
    def _callback_func(self, lis):
        if not isinstance(lis, list):
            return

        response = lis[0]
        callback = lis[1]

        request_next = callback(response)
        if request_next is None:
            WARNING("[engine] [callback_func] nothing to call back")
            return

        self.request_buf.append(request_next)

        if schedule.Judge_empty_get():
            while len(self.request_buf):
                tmp = self.request_buf.pop()
                self.engine_start(tmp)
コード例 #7
0
    def download_picture(self, image_url, path = "", filename = ""):
    	if path is None:
    		if Config_paser.locate is None:
                path = os.path.getcwd()
            else:
                path = Config_paser.locate

        INFO("[Download] the picture will be downloaded in {}".format(path))

        if filename is not None:
            os.path.join(path, '{}.jpg'.format(filename))
        else:
            os.path.join(path, '{}.jpg'.format(image_url))

    	try:
            image = requests.get(image_url, stream = True)
            with open(path, 'wb') as img:
                img.write(image.content)
        except Exception as e:
            WARNING(e)
コード例 #8
0
    def download_anything(self, down_url, path = "", filename = ""):
        if path is None:
            if Config_paser.locate is None:
                path = os.path.getcwd()
            else:
                path = Config_paser.locate

        INFO("[Download] [download_anthing] the file will be downloaded in {}".format(path))

        if filename is not None:
            file = filename.split('.')
            if file[1] is None:
                filename += '.dat'
        else:
            filename = down_url + '.dat'

        os.path.join(path, filename)    

        try:
            result = requests.get(down_url, stream = True)
            with open(path, 'wb') as things:
                things.write(result.content)
        except Exception as e:
            WARNING(e)
コード例 #9
0
ファイル: __Threading.py プロジェクト: sonlia/-scrapy-
    def run(self):
        while not self.workQueue.empty():
            item_obj = self.workQueue.get()
            url = item_obj.url
            method = item_obj.method if item_obj.method else 'GET'
            cache = item_obj.cache
            headers = item_obj.headers or {}
            meta = item_obj.cookieJar or None
            cookie = None

            if not isinstance(meta, dict):
                WARNING('meta should be a dict')
            else:
                cookie = meta.get('cookieJar', None)

            try:
                formdata = item_obj.formdata if item_obj.formdata else None
            except:
                formdata = None

            if method == 'GET':
                response = response_obj()
                http_response, http_cookie = self.get_url(url, headers, cookie)
                response.cookie = http_cookie
                response.response_string = self.deal_response_with_xpath(
                    http_response)
                cache.Response_Cache = response

            if method == 'POST':
                response = response_obj()
                http_response, http_cookie = self.get_url(
                    url, formdata, headers, cookie)
                response.cookie = http_cookie
                response.response_string = self.deal_response_with_xpath(
                    http_response)
                cache.Response_Cache = response
コード例 #10
0
ファイル: Schedule.py プロジェクト: sorcerer0001/-scrapy-
    def Putresult_Post(self, response):
        if not isinstance(object, str):
            WARNING("[Schedule] [result] the wrong params!!")
            return

        self.result_post.append(response)
コード例 #11
0
ファイル: request.py プロジェクト: sonlia/-scrapy-
    def __init__(self, *args, **kwargs):

        try:
            self.url = kwargs.pop('url')
            INFO("[request] url to Crawer = {}".format(self.url))
        except Exception:
            self.url = None
            WARNING("[request] without url???, this request will be stopped!!")
            return

        try:
            self.method = kwargs.pop('method')
        except Exception:
            self.method = None
            WARNING("[request] without method auto set method to 'GET'")
            self.method = 'GET'

        try:
            formdata = kwargs.pop('formdata')
            if (self.method == "GET"):
                WARNING(
                    "[request] ...there exists dict of formdata turn 'GET' to 'POST'"
                )
                self.method = 'POST'
        except Exception:
            formdata = None
            if (self.method == 'POST'):
                WARNING(
                    "[request]...formdata is empty auto turn 'POST' to 'GET'")
                self.method = 'GET'

        if formdata:
            items = formdata.iteritems() if isinstance(formdata,
                                                       dict) else formdata
            self.formdata = [(str_to_unicode(k, encoding = 'utf-8'), str_to_unicode(v, encoding = 'utf-8')) \
                    for k,v in items]

        try:
            self.headers = kwargs.pop('headers')
        except Exception:
            self.headers = None
            INFO("[request] without header, set header to None")

        try:
            self.callback = kwargs.pop('callback')
        except Exception:
            self.callback = None
            INFO("[request] nothing to callback!!")

        try:
            self.filename = kwargs.pop('filename')
        except Exception:
            self.filename = None
            INFO("[request] without filename.......")

        try:
            self.download_type = kwargs.pop('download_type')
        except Exception:
            if self.method == "DOWNLOAD":
                WARNING(
                    "[request] method is download,but there isn't exists and method??, stop it!!"
                )
                return

        try:
            self.headers = kwargs.pop('headers')
        except Exception:
            self.headers = None

        try:
            self.meta = kwargs.pop('meta')
        except Exception:
            self.meta = None