Example #1
0
    def run(self):
        try:
            self.downloaded = os.path.getsize(self.filename)
        except OSError:
            self.downloaded = 0

        self.start_pointer = self.ranges[0] + self.downloaded
        if self.start_pointer >= self.ranges[1]:
            print '%s has been over.' % self.filename
            return

        types = urllib.splittype(self.url)
        host, res = urllib.splithost(types[1])
        h = HTTP()
        h.connect(host)
        h.putrequest('GET', res)
        h.putheader('Host', host)
        h.putheader('Range', "bytes=%d-%d" % (self.start_pointer, self.ranges[1]))
        h.endheaders()
        response = h._conn.getresponse()
        data = response.read(self.once_buffer)
        while data:
            file_opener = open(self.filename, 'ab+')
            file_opener.write(data)
            file_opener.close()
            self.downloaded += len(data)
            data = response.read(self.once_buffer)
        h.close()
Example #2
0
def get_redirect_url(url):
    types = urllib.splittype(url)
    host, res = urllib.splithost(types[1])
    h = HTTP()
    h.connect(host)
    h.putrequest('HEAD', res)
    h.endheaders()
    status, reason, headers = h.getreply()
    return headers['location']
Example #3
0
def get_file_size(url):
    types = urllib.splittype(url)
    host, res = urllib.splithost(types[1])
    h = HTTP()
    h.connect(host)
    h.putrequest('HEAD', res)
    h.putheader('Host', host)
    h.endheaders()
    status, reason, headers = h.getreply()
    return float(headers['Content-Length'])
Example #4
0
def main():

    cur_url = "http://zh.wikipedia.org/wiki/Category:%E9%A0%81%E9%9D%A2%E5%88%86%E9%A1%9E" #"http://zh.wikipedia.org/wiki/Wikipedia:%E5%88%86%E9%A1%9E%E7%B4%A2%E5%BC%95"
    cur_alias = "分类"
    cts = {}
    its = {} # ItemName(Wiki Item URL) : ([Alias1, Alias2, Alias3, ...], [InItemName1, InItemName, ..], [OutItemName1, OutItemName2, ..])
    h = HTTP()
    h.connect("zh.wikipedia.org")
    qcs = Queue.Queue()
    qns = Queue.Queue()
    qcs.put({"url": cur_url, "alias": cur_alias})
    #tp = ThreadPool(10)
    tds = [Thread(target=worker, args=(qcs if i % 2 else qns, qcs, qns, cts, its))
            for i in xrange(10)]

    t_start = time.time()
    for i in tds:
        i.start()
        time.sleep(5)

    for i in tds:
        i.join()
    
    #worker_c(qcs, qns, cts, its)
    #rqs = makeRequests(worker_c, args)
    #[tp.putRequest(req) for req in rqs]

    #try:
    #    tp.joinAllDismissedWorkers()
    #except KeyboardInterrupt:
    #    tp.joinAllDismissedWorkers()
    
    with open("CTS.db", "wb") as f:
        pickle.dump(cts, f)
    with open("ITS.db", "wb") as f:
        pickle.dump(its, f)

    print "CTS:", cts
    print "___________________________"
    print "ITEMS:", its
    print "Coust Time: %s" % (time.time() - t_start)
Example #5
0
    def __call__(self, *args, **kw):
        method = self.method
        if method == 'PUT' and len(args) == 1 and not kw:
            query = [args[0]]
            args = ()
        else:
            query = []
        for i in range(len(args)):
            try:
                k = self.args[i]
                if kw.has_key(k): raise TypeError, 'Keyword arg redefined'
                kw[k] = args[i]
            except IndexError:
                raise TypeError, 'Too many arguments'

        headers = {}
        for k, v in self.headers.items():
            headers[translate(k, dashtrans)] = v
        method = self.method
        if headers.has_key('Content-Type'):
            content_type = headers['Content-Type']
            if content_type == 'multipart/form-data':
                return self._mp_call(kw)
        else:
            content_type = None
            if not method or method == 'POST':
                for v in kw.values():
                    if hasattr(v, 'read'): return self._mp_call(kw)

        can_marshal = type2marshal.has_key
        for k, v in kw.items():
            t = type(v)
            if can_marshal(t): q = type2marshal[t](k, v)
            else: q = '%s=%s' % (k, quote(v))
            query.append(q)

        url = self.rurl
        if query:
            query = join(query, '&')
            method = method or 'POST'
            if method == 'PUT':
                headers['Content-Length'] = str(len(query))
            if method != 'POST':
                url = "%s?%s" % (url, query)
                query = ''
            elif not content_type:
                headers['Content-Type'] = 'application/x-www-form-urlencoded'
                headers['Content-Length'] = str(len(query))
        else:
            method = method or 'GET'

        if (self.username and self.password
                and not headers.has_key('Authorization')):
            headers['Authorization'] = ("Basic %s" % replace(
                encodestring('%s:%s' %
                             (self.username, self.password)), '\012', ''))

        try:
            h = HTTP()
            h.connect(self.host, self.port)
            h.putrequest(method, self.rurl)
            for hn, hv in headers.items():
                h.putheader(translate(hn, dashtrans), hv)
            h.endheaders()
            if query: h.send(query)
            ec, em, headers = h.getreply()
            response = h.getfile().read()
        except:
            raise NotAvailable, RemoteException(NotAvailable,
                                                sys.exc_info()[1], self.url,
                                                query)
        if (ec - (ec % 100)) == 200:
            return (headers, response)
        self.handleError(query, ec, em, headers, response)
Example #6
0
    def __call__(self,*args,**kw):
        method=self.method
        if method=='PUT' and len(args)==1 and not kw:
            query=[args[0]]
            args=()
        else:
            query=[]
        for i in range(len(args)):
            try:
                k=self.args[i]
                if kw.has_key(k): raise TypeError, 'Keyword arg redefined'
                kw[k]=args[i]
            except IndexError:    raise TypeError, 'Too many arguments'

        headers={}
        for k, v in self.headers.items(): headers[translate(k,dashtrans)]=v
        method=self.method
        if headers.has_key('Content-Type'):
            content_type=headers['Content-Type']
            if content_type=='multipart/form-data':
                return self._mp_call(kw)
        else:
            content_type=None
            if not method or method=='POST':
                for v in kw.values():
                    if hasattr(v,'read'): return self._mp_call(kw)

        can_marshal=type2marshal.has_key
        for k,v in kw.items():
            t=type(v)
            if can_marshal(t): q=type2marshal[t](k,v)
            else: q='%s=%s' % (k,quote(v))
            query.append(q)

        url=self.rurl
        if query:
            query=join(query,'&')
            method=method or 'POST'
            if method == 'PUT':
                headers['Content-Length']=str(len(query))
            if method != 'POST':
                url="%s?%s" % (url,query)
                query=''
            elif not content_type:
                headers['Content-Type']='application/x-www-form-urlencoded'
                headers['Content-Length']=str(len(query))
        else: method=method or 'GET'

        if (self.username and self.password and
            not headers.has_key('Authorization')):
            headers['Authorization']=(
                "Basic %s" %
                replace(encodestring('%s:%s' % (self.username,self.password)),
                                     '\012',''))

        try:
            h=HTTP()
            h.connect(self.host, self.port)
            h.putrequest(method, self.rurl)
            for hn,hv in headers.items():
                h.putheader(translate(hn,dashtrans),hv)
            h.endheaders()
            if query: h.send(query)
            ec,em,headers=h.getreply()
            response     =h.getfile().read()
        except:
            raise NotAvailable, RemoteException(
                NotAvailable,sys.exc_info()[1],self.url,query)
        if (ec - (ec % 100)) == 200:
            return (headers,response)
        self.handleError(query, ec, em, headers, response)
Example #7
0
    def __call__(self, *args, **kw):
        method = self.method
        if method == "PUT" and len(args) == 1 and not kw:
            query = [args[0]]
            args = ()
        else:
            query = []
        for i in range(len(args)):
            try:
                k = self.args[i]
                if kw.has_key(k):
                    raise TypeError, "Keyword arg redefined"
                kw[k] = args[i]
            except IndexError:
                raise TypeError, "Too many arguments"

        headers = {}
        for k, v in self.headers.items():
            headers[translate(k, dashtrans)] = v
        method = self.method
        if headers.has_key("Content-Type"):
            content_type = headers["Content-Type"]
            if content_type == "multipart/form-data":
                return self._mp_call(kw)
        else:
            content_type = None
            if not method or method == "POST":
                for v in kw.values():
                    if hasattr(v, "read"):
                        return self._mp_call(kw)

        can_marshal = type2marshal.has_key
        for k, v in kw.items():
            t = type(v)
            if can_marshal(t):
                q = type2marshal[t](k, v)
            else:
                q = "%s=%s" % (k, quote(v))
            query.append(q)

        url = self.rurl
        if query:
            query = join(query, "&")
            method = method or "POST"
            if method == "PUT":
                headers["Content-Length"] = str(len(query))
            if method != "POST":
                url = "%s?%s" % (url, query)
                query = ""
            elif not content_type:
                headers["Content-Type"] = "application/x-www-form-urlencoded"
                headers["Content-Length"] = str(len(query))
        else:
            method = method or "GET"

        if self.username and self.password and not headers.has_key("Authorization"):
            headers["Authorization"] = "Basic %s" % gsub(
                "\012", "", encodestring("%s:%s" % (self.username, self.password))
            )

        try:
            h = HTTP()
            h.connect(self.host, self.port)
            h.putrequest(method, self.rurl)
            for hn, hv in headers.items():
                h.putheader(translate(hn, dashtrans), hv)
            h.endheaders()
            if query:
                h.send(query)
            ec, em, headers = h.getreply()
            response = h.getfile().read()
        except:
            raise NotAvailable, RemoteException(NotAvailable, sys.exc_value, self.url, query)

        if ec == 200:
            return (headers, response)
        self.handleError(query, ec, em, headers, response)