Ejemplo n.º 1
0
 def testParseUrl(self):
     for url in urls:
         #print "-------------", url, "------------"
         result = parse_url(url)
         #print "RESULT:", result
         proto1, user1, passwd1, host1, query1 = parse_url(url)
         proto2, user2, passwd2, host2, query2 = urls.get(url)
         self.assert_(proto1  == proto2,  proto1)
         self.assert_(user1   == user2,   user1)
         self.assert_(passwd1 == passwd2, passwd1)
         self.assert_(host1   == host2,   host1)
         self.assert_(equals(query1, query2),
                      '%s is not %s' % (url, query1))
Ejemplo n.º 2
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L',
                        '--limit',
                        action="store",
                        dest='limit',
                        help="Max download speed limit.")
    parser.add_argument('-D',
                        '--dir',
                        action="store",
                        dest='output_dir',
                        help="Download task to dir.")
    parser.add_argument('-S',
                        '--secret',
                        action="store",
                        dest='secret',
                        help="Retrieval password.",
                        default="")
    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir

    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    # add 'http://'
    links = map(add_http, links)
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan()
            info = pan.get_dlink(url, secret)
            cookies = 'BDUSS={0}'.format(pan.bduss)
            download_command(info.filename,
                             info.dlink,
                             cookies=cookies,
                             limit=limit,
                             output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            raise NotImplementedError('This function has not implemented.')
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logger.debug(url, extra={"type": "wrong link", "method": "None"})
            continue
        else:
            continue

    sys.exit(0)
Ejemplo n.º 3
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L', '--limit', action="store", dest='limit', help="Max download speed limit.")
    parser.add_argument('-D', '--dir', action="store", dest='output_dir', help="Download task to dir.")
    parser.add_argument('-S', '--secret', action="store", dest='secret', help="Retrieval password.", default="")
    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir

    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    # add 'http://'
    links = map(add_http, links)
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan()
            info = pan.get_dlink(url, secret)
            cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
            if cookies and pan.pcsett:
                cookies += ';pcsett={0}'.format(pan.pcsett)
            if cookies:
                cookies += '"'
            download_command(info.filename, info.dlink, cookies=cookies, limit=limit, output_dir=output_dir)

        elif res.get('type') == 4:
            pan = Pan()
            fsid = res.get('fsid')
            newUrl = res.get('url')
            info = pan.get_dlink(newUrl, secret, fsid)
            cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
            if cookies and pan.pcsett:
                cookies += ';pcsett={0}'.format(pan.pcsett)
            if cookies:
                cookies += '"'
            download_command(info.filename, info.dlink, cookies=cookies, limit=limit, output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            raise NotImplementedError('This function has not implemented.')
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logger.debug(url, extra={"type": "wrong link", "method": "None"})
            continue
        else:
            continue

    sys.exit(0)
Ejemplo n.º 4
0
def get_list(html,url):
    tree = etree.HTML(html)
    info = {"url":url}
    nodes = tree.xpath("//div[@name=\"div_houselist\"]/dl")
    data = get_comm(nodes,list_mp,info)
    nodes = tree.xpath("//a[@id=\"PageControl1_hlk_next\"]/@href")
    next_url = util.parse_url(url,nodes[0]) if len(nodes)==1 and nodes[0].strip() != "" else None
    return next_url,data
Ejemplo n.º 5
0
def cb():
    url = "http://esf.cq.fang.com"
    #url,html = util.get_page({"url":url})
    html = open("data/esf.html").read()
    entries = get_entry(html)
    fp = open("data/result.txt","a")
    for entry in entries:
        if entry[0] is None:
            continue
        eurl = util.parse_url(url,entry[0])
        html = open("data/yubei.html").read()
        eurl,html = util.get_page()
        subs = get_sub(html)
        for sub in subs:
            surl = util.parse_url(eurl,sub[0])
            if surl == eurl:
                continue
            html = open("data/list.html").read()
            nurl,items = get_list(html,surl)
    fp.close()
Ejemplo n.º 6
0
def get_user_info(u,fp):
    ourl = "http://www.toutiao.com/c/user/%s/" % u
    url,html = util.get_page({"url":ourl})
    p,h = get_urlinfo(url)
    params = get_params("func.js")
    uid = get_userinfo(html)
    if params is not None and uid is not None:
        params = json.loads(params)
        params["user_id"] = uid
        path = "/c/user/article/"
        nurl = "%s//%s%s" % (p,h,path)
        count = 3
        while True:
            url,html = util.get_page({"url":nurl,"data":params,"method":"post"})
            if html is None or len(html) == 0:
                util.log_msg("could not get data from url:%s,data:%s,uid:%s" % (nurl,str(params),u))
                break
            mp = json.loads(html)
            if "data" in mp and isinstance(mp["data"],list):
                if len(mp["data"]) == 0:
                    util.log_msg("no data from response.url:%s" % nurl)
                result = []
                for item in mp["data"]:
                    turl = util.parse_url(url,item["source_url"])
                    try:
                        get_article(turl,url,item,fp,result)
                    except Exception:
                        tp, e,trace = sys.exc_info()
                        util.log_msg("get article(url:%s) info error:%s" % (turl,str(e)))
                if len(result) > 0:
                    if fp is None:
                        insert_into_db(result)
                    else:
                        for item in result:
                            fp.write("[%s]\t%s\t%s\n" % (time.ctime(),u,json.dumps(item)))
            else:
                util.log_msg("no data in content.url:%s" % nurl)
            if mp["has_more"]:
                params = get_params("func.js")
                params = json.loads(params)
                params["user_id"] = uid
                nxt = mp["next"]
                for key in nxt.keys():
                    params[key]=nxt[key]
            else:
                break
            count -= 1
            if count <= 0:
                break
    else:
        util.log_msg("could not parse data from html file,need to check this out.url:%s,referer:%s." % (ourl,referer))
Ejemplo n.º 7
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L', '--limit', action="store", dest='limit', help="Max download speed limit.")
    parser.add_argument('-D', '--dir', action="store", dest='output_dir', help="Download task to dir.")
    parser.add_argument('-S', '--secret', action="store", dest='secret', help="Retrieval password.", default="")
    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir

    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    links = map(add_http, links)        # add 'http://'
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan(url, secret=secret)
            count = 1
            while count != 0:
                link, filename, count = pan.info
                download_command(filename, link, limit=limit, output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            album_id = res.get('album_id')
            uk = res.get('uk')
            album = Album(album_id, uk)
            count = 1
            while count != 0:
                link, filename, count = album.info
                download_command(filename, link, limit=limit, output_dir=output_dir)
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logging.debug(url)
            continue
        else:
            continue

    sys.exit(0)
Ejemplo n.º 8
0
    def set_url(self, url):
        from electrum import util
        from decimal import Decimal

        try:
            address, amount, label, message, request_url, url = util.parse_url(
                url)
        except Exception:
            QMessageBox.warning(self.main_window, _('Error'),
                                _('Invalid bitcoin URL'), _('OK'))
            return

        if amount:
            try:
                if self.main_window.base_unit() == 'mBTC':
                    amount = str(1000 * Decimal(amount))
                else:
                    amount = str(Decimal(amount))
            except Exception:
                amount = "0.0"
                QMessageBox.warning(self.main_window, _('Error'),
                                    _('Invalid Amount'), _('OK'))

        if request_url:
            try:
                from electrum import paymentrequest
            except:
                print "cannot import paymentrequest"
                request_url = None

        if not request_url:
            self.main_window.set_send(address, amount, label, message)
            self.lite_window.set_payment_fields(address, amount)
            return

        def payment_request():
            self.payment_request = paymentrequest.PaymentRequest(request_url)
            if self.payment_request.verify():
                self.main_window.emit(SIGNAL('payment_request_ok'))
            else:
                self.main_window.emit(SIGNAL('payment_request_error'))

        threading.Thread(target=payment_request).start()
        self.main_window.prepare_for_payment_request()
Ejemplo n.º 9
0
def replace_image(content,referer):
    tree = etree.HTML(content)
    nodes = tree.xpath("//img")
    imgs = []
    for node in nodes:
        keys = node.keys()
        key = "src" if "src" in keys else ("href" if "href" in keys else None)
        if key is not None:
            src = util.parse_url(referer,node.get(key))
            if src.find("?") > -1:
                # not good here
                util.log_msg("expecting image url with no params%s" % src)
                continue
            _,name,nsrc = imge_transfer(src,referer)
            node.set(key,nsrc)
            imgs.append((src,name))
    target = "".join([etree.tounicode(node) for node in tree.xpath("/html/body/*")])
    imgs_str = "|".join([o[1] for o in imgs])
    return target,imgs_str
Ejemplo n.º 10
0
    def set_url(self, url):
        from electrum_vior import util
        from decimal import Decimal

        try:
            address, amount, label, message, request_url, url = util.parse_url(url)
        except Exception:
            QMessageBox.warning(self.main_window, _("Error"), _("Invalid viorcoin URL"), _("OK"))
            return

        if amount:
            try:
                if self.main_window.base_unit() == "mVIOR":
                    amount = str(1000 * Decimal(amount))
                else:
                    amount = str(Decimal(amount))
            except Exception:
                amount = "0.0"
                QMessageBox.warning(self.main_window, _("Error"), _("Invalid Amount"), _("OK"))

        if request_url:
            try:
                from electrum_vior import paymentrequest
            except:
                print "cannot import paymentrequest"
                request_url = None

        if not request_url:
            self.main_window.set_send(address, amount, label, message)
            self.lite_window.set_payment_fields(address, amount)
            return

        def payment_request():
            self.payment_request = paymentrequest.PaymentRequest(request_url)
            if self.payment_request.verify():
                self.main_window.emit(SIGNAL("payment_request_ok"))
            else:
                self.main_window.emit(SIGNAL("payment_request_error"))

        threading.Thread(target=payment_request).start()
        self.main_window.prepare_for_payment_request()
Ejemplo n.º 11
0
def get_article(url,referer,data,fp,result2):
    url, h = util.get_page({"url":url,"headers":{"Referer":referer}})
    tree = etree.HTML(h)
    scripts = [o for o in tree.xpath("//script/text()") if o.find("BASE_DATA") > -1 or o.find("__pgcInfo")>-1]
    scripts.append("console.log(JSON.stringify(BASE_DATA))")
    open("data/tmp.js","w").write("\n".join(scripts))
    r = get_params("data/tmp.js")
    if r is not None:
        mp = json.loads(r)
        obj = {"entry":data,"data":mp}
        conf = [("title",["data","artilceInfo","title"]),
                ("content",["data","artilceInfo","content"],None,html.unescape),
                ("comments",["data","commentInfo","comments_count"],0),
                ("isOriginal",["data","artilceInfo","subInfo","isOriginal"],False),
                ("url",["__const",url]),
                ("views",["entry","go_detail_count"], 0),
                ("cover",["entry","image_url"],""),
                ("abstract",["entry","abstract"], ""),
                ("source",["data","artilceInfo","subInfo","source"],""),
                ("publishtime",["data","artilceInfo","subInfo","time"]),
                ("tags",["data","artilceInfo","tagInfo","tags"],"",lambda o:",".join([so["name"] for so in o])),
                ("category",["data","headerInfo","chineseTag"],""),
            ]
        result = {}
        for cf in conf:
            v = util.get_jpath(obj,cf[1],cf[2] if len(cf)>2 else None,cf[3] if len(cf)>3 else None)
            if v is not None:
                result[cf[0]] = v
        result["id"] = hashlib.md5(url.encode("utf-8")).hexdigest()
        if "content" in result:
            result["content"],result["images"] = replace_image(result["content"],url)
            if "cover" in result and len(result["cover"])>0:
                result["cover"] = imge_transfer(util.parse_url(url,result["cover"]),url)[1]
            if len(result) > 0:
                result2.append(result)
        else:
            util.log_msg("could not parse content from html file,need to check this out.url:%s,referer:%s." % (url,referer))
    else:
        util.log_msg("could not parse data from html file,need to check this out.url:%s,referer:%s." % (url,referer))
Ejemplo n.º 12
0
    def set_url(self, url):
        from electrum import util
        from decimal import Decimal

        try:
            address, amount, label, message, request_url, url = util.parse_url(url)
        except Exception:
            QMessageBox.warning(self.main_window, _('Error'), _('Invalid bitcoin URL'), _('OK'))
            return

        if amount:
            try:
                if self.main_window.base_unit() == 'mBTC': 
                    amount = str( 1000* Decimal(amount))
                else: 
                    amount = str(Decimal(amount))
            except Exception:
                amount = "0.0"
                QMessageBox.warning(self.main_window, _('Error'), _('Invalid Amount'), _('OK'))

        if request_url:
            from electrum import paymentrequest

        if not request_url:
            self.main_window.set_send(address, amount, label, message)
            self.lite_window.set_payment_fields(address, amount)
            return

        def payment_request():
            self.payment_request = paymentrequest.PaymentRequest(self.config)
            self.payment_request.read(request_url)
            if self.payment_request.verify():
                self.main_window.emit(SIGNAL('payment_request_ok'))
            else:
                self.main_window.emit(SIGNAL('payment_request_error'))

        threading.Thread(target=payment_request).start()
        self.main_window.prepare_for_payment_request()
Ejemplo n.º 13
0
sina_entry = {
    "TopNews":("//div[@class=\"top_newslist\"]/ul",[("urlTitle",["mxpath",("./li/a/text()","./li/a/@href")]),("header",["future","Header","name"]),("date",["future","Header","date"])],split_dataurls),
    "Header":("//div[@id=\"wwwidx_imp_con\"]",[("name",["xpath","./div[1]/div/span/a/text()"]),("date",["xpath","./div[1]/span/text()"])])
}

def detect_by_key(name,value):
    return lambda p,cfg,h,entry:True if name in entry and entry[name]==value else False

add_level2 = lambda o1,o2:([],[{"name":o[1],"url":o[0],"level":2,"force":True} for o in o2["nameUrl"]])

esf_l1_entry = {
    "District":(
        "//div[@id=\"list_D02_10\"]",
        [
            ("nameUrl",["xpath_lambda","./div[@class=\"qxName\"]/a[@href]",lambda o,info:(util.parse_url(info["url"],o.get("href")),o.text.strip())])
        ],
        add_level2
    ),
}

add_level3 = lambda o1,o2:([],[{"name":o[1],"url":o[0],"level":3,"force":True,"path":o2["source"]} for o in (o2["nameUrl"] if isinstance(o2["nameUrl"],list) and not isinstance(o2["nameUrl"][0],str) else [o2["nameUrl"]])])

esf_l2_entry = {
    "District":(
        "//div[@id=\"div_shangQuan\"]",
        [
            ("nameUrl",["xpath_lambda",".//p[@id=\"shangQuancontain\"]/a[@href]",lambda o,info:(util.parse_url(info["url"],o.get("href")),o.text.strip()) if o.text.strip()!="不限" else None]),
            ("source",["jpath",["entrance","name"]])
        ],
        add_level3
Ejemplo n.º 14
0
        try:
            paymntack = paymentrequest_pb2.PaymentACK()
            paymntack.ParseFromString(r.content)
        except Exception:
            return False, "PaymentACK could not be processed. Payment was sent; please manually verify that payment was received."

        print "PaymentACK message received: %s" % paymntack.memo
        return True, paymntack.memo


if __name__ == "__main__":

    try:
        uri = sys.argv[1]
    except:
        print "usage: %s url" % sys.argv[0]
        print "example url: \"bitcoin:mpu3yTLdqA1BgGtFUwkVJmhnU3q5afaFkf?r=https%3A%2F%2Fbitcoincore.org%2F%7Egavin%2Ff.php%3Fh%3D2a828c05b8b80dc440c80a5d58890298&amount=1\""
        sys.exit(1)

    address, amount, label, message, request_url, url = util.parse_url(uri)
    pr = PaymentRequest(request_url)
    if not pr.verify():
        sys.exit(1)

    print 'Payment Request Verified Domain: ', pr.domain
    print 'outputs', pr.outputs
    print 'Payment Memo: ', pr.payment_details.memo

    tx = "blah"
    pr.send_ack(tx, refund_addr="1vXAXUnGitimzinpXrqDWVU4tyAAQ34RA")
            paymntack = paymentrequest_pb2.PaymentACK()
            paymntack.ParseFromString(r.content)
        except Exception:
            print "PaymentACK could not be processed. Payment was sent; please manually verify that payment was received."
            return

        print "PaymentACK message received: %s" % paymntack.memo
        return paymntack.memo


if __name__ == "__main__":

    try:
        uri = sys.argv[1]
    except:
        print "usage: %s url" % sys.argv[0]
        print 'example url: "viorcoin:mpu3yTLdqA1BgGtFUwkVJmhnU3q5afaFkf?r=https%3A%2F%2Fbitcoincore.org%2F%7Egavin%2Ff.php%3Fh%3D2a828c05b8b80dc440c80a5d58890298&amount=1"'
        sys.exit(1)

    address, amount, label, message, request_url, url = util.parse_url(uri)
    pr = PaymentRequest(request_url)
    if not pr.verify():
        sys.exit(1)

    print "Payment Request Verified Domain: ", pr.domain
    print "outputs", pr.outputs
    print "Payment Memo: ", pr.payment_details.memo

    tx = "blah"
    pr.send_ack(tx, refund_addr="1vXAXUnGitimzinpXrqDWVU4tyAAQ34RA")
Ejemplo n.º 16
0
    def setup_play(self, url):
        url_tup = util.parse_url(url)
        if not url_tup:
            QMessageBox.warning(self, 'Warning', 'Invalid URL.')
            return -1
        ip = url_tup[0]
        port = int(url_tup[1])
        path = url_tup[2]
        try:
            self.client_rtsp_socket.connect((ip, port))
        except Exception as e:
            QMessageBox.warning(self, 'Warning', 'Error: connect to media server failed.')
            return -1

        # send OPTIONS
        request_dict = {'CSeq': str(self.seq)}
        request = rtsp.generate_request('OPTIONS', url, request_dict)
        self.client_rtsp_socket.send(request.encode())
        response = self.client_rtsp_socket.recv(1024).decode()
        if rtsp.get_status_code(response) != 200:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response code.')
            return -1
        response_dict = rtsp.get_response_dict(response)
        if int(response_dict.get('CSeq')) != self.seq:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response SN.')
            return -1
        self.seq += 1
        # send DESCRIBE
        request_dict = {'CSeq': str(self.seq), 'Accept': 'application/sdp'}
        request = rtsp.generate_request('DESCRIBE', url, request_dict)
        self.client_rtsp_socket.send(request.encode())
        response = self.client_rtsp_socket.recv(1024).decode()
        if rtsp.get_status_code(response) != 200:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response code.')
            return -1
        response_dict = rtsp.get_response_dict(response)
        if int(response_dict.get('CSeq')) != self.seq:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response SN.')
            return -1
        self.client_rtp_port = util.match_rtp_port(response)
        if not self.client_rtp_port:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: can not specify RTP port.')
            return -1
        self.client_rtcp_port = self.client_rtp_port+1
        self.seq += 1
        # setup RTP and RTCP socket
        self.client_rtp_socket.bind(('127.0.0.1', self.client_rtp_port))
        self.client_rtcp_socket.bind(('127.0.0.1', self.client_rtcp_port))
        self.status = self.READY
        # send SETUP
        request_dict = {'CSeq': str(self.seq), 'Transport': 'RTP/AVP;unicast;client_port=%d-%d' % (self.client_rtp_port,
                                                                                                   self.client_rtcp_port)}
        request = rtsp.generate_request('SETUP', url, request_dict)
        self.client_rtsp_socket.send(request.encode())
        response = self.client_rtsp_socket.recv(1024).decode()
        if rtsp.get_status_code(response) != 200:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response code.')
            return -1
        response_dict = rtsp.get_response_dict(response)
        if int(response_dict.get('CSeq')) != self.seq:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response SN.')
            return -1
        self.client_session_id = int(response_dict.get('Session'))
        self.seq += 1
        self.status = self.READY
        # send PLAY
        request_dict = {'CSeq': str(self.seq), 'Session': self.client_session_id, 'Range': 'npt=0.000-'}
        request = rtsp.generate_request('PLAY', url, request_dict)
        self.client_rtsp_socket.send(request.encode())
        response = self.client_rtsp_socket.recv(1024).decode()
        if rtsp.get_status_code(response) != 200:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response code.')
            return -1
        response_dict = rtsp.get_response_dict(response)
        if int(response_dict.get('CSeq')) != self.seq:
            # self.close_rtsp_connection()
            self.destroy_connection()
            QMessageBox.warning(self, 'Warning', 'Error: unexpected server response SN.')
            return -1
        self.seq += 1
        self.current_time, self.media_duration = util.match_media_time(response)
        return 0
Ejemplo n.º 17
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L',
                        '--limit',
                        action="store",
                        dest='limit',
                        help="Max download speed limit.")
    parser.add_argument('-D',
                        '--dir',
                        action="store",
                        dest='output_dir',
                        help="Download task to dir.")
    parser.add_argument('-S',
                        '--secret',
                        action="store",
                        dest='secret',
                        help="Retrieval password.",
                        default="")
    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir

    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    links = map(add_http, links)  # add 'http://'
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan(url, secret=secret)
            count = 1
            while count != 0:
                link, filename, count = pan.info
                download_command(filename,
                                 link,
                                 limit=limit,
                                 output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            album_id = res.get('album_id')
            uk = res.get('uk')
            album = Album(album_id, uk)
            count = 1
            while count != 0:
                link, filename, count = album.info
                download_command(filename,
                                 link,
                                 limit=limit,
                                 output_dir=output_dir)
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logging.debug(url)
            continue
        else:
            continue

    sys.exit(0)
Ejemplo n.º 18
0
import fmt_json

def get(url,fname):
    url,html = util.get_page({"url":url})
    open(fname,"w").write(html)

def get_entry(html):
    tree = etree.HTML(html)
    return [(o.get("href"),o.text) for o in tree.xpath("//div[@id=\"list_D02_10\"]/div/a")]

def get_sub(html):
    tree = etree.HTML(html)
    return [(o.get("href"),o.text) for o in tree.xpath("//div[@id=\"div_shangQuan\"]//a")]

all_text_lbd = lambda o,info:re.sub("[ \t\n\r]+"," ","".join(o.itertext())).strip()
url_parse_lbd = lambda o,info:util.parse_url(info["url"],o)

list_mp = {
        "cover":"./dt/a/img/@src",
        "url":("./dt/a/@href",url_parse_lbd),
        "title":"./dd/p[@class=\"title\"]/a/@title",
        "info":("./dd/p[@class=\"mt12\"]",all_text_lbd),
        "brand":"./dd/p[@class=\"mt10\"]/a/@title",
        "brand_url":("./dd/p[@class=\"mt10\"]/a/@href", url_parse_lbd),
        "addr":"./dd/p[@class=\"mt10\"]/span/@title",
        "tags":"./dd//div[@class=\"pt4 floatl\"]/span/text()",
        "price":("./dd/div[@class=\"moreInfo\"]/p[1]",all_text_lbd),
        "average_price":("./dd/div[@class=\"moreInfo\"]/p[2]",all_text_lbd),
        "size":("./dd/div[@class=\"area alignR\"]/p[1]/text()",lambda o,info:o[:-1]),
    }
Ejemplo n.º 19
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L', '--limit', action="store", dest='limit', help="Max download speed limit.")
    parser.add_argument('-D', '--dir', action="store", dest='output_dir', help="Download task to dir.")
    parser.add_argument('-S', '--secret', action="store", dest='secret', help="Retrieval password.", default="")
    parser.add_argument('-P', '--partial', action="count", help="Partial download.")
    parser.add_argument('-E', '--extension', action="store", dest='extension', help="Download only specified by the extension. e.g. aw3")

    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir

    extension = namespace.extension

    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    # add 'http://'
    links = map(add_http, links)
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan()
            fis = pan.get_file_infos(url, secret)

            if namespace.partial:
                while True:
                    fis = select_download(fis)
                    if fis is not None:
                        break

            for fi in fis:
                if extension and not matchExtension(fi.filename, extension):
                    print('{filename} is igonred'.format(filename=fi.filename))
                    continue
                cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
                if cookies and pan.pcsett:
                    cookies += ';pcsett={0}'.format(pan.pcsett)
                if cookies:
                    cookies += '"'

                savedir = fi.path.replace(fi.parent_path, '', 1)[1:]
                download_command(fi.filename, savedir, fi.dlink, cookies=cookies, limit=limit, output_dir=output_dir)

        elif res.get('type') == 4:
            pan = Pan()
            fsid = res.get('fsid')
            newUrl = res.get('url')
            infos = pan.get_file_infos(newUrl, secret, fsid)
            cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
            if cookies and pan.pcsett:
                cookies += ';pcsett={0}'.format(pan.pcsett)
            if cookies:
                cookies += '"'
            for info in infos:
                if extension and not matchExtension(info.filename, extension):
                    print('{filename} is ignored'.format(filename=info.filename))
                    continue
                download_command(info.filename, info.dlink, cookies=cookies, limit=limit, output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            raise NotImplementedError('This function has not implemented.')
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logger.debug(url, extra={"type": "wrong link", "method": "None"})
            continue
        else:
            continue

    sys.exit(0)
Ejemplo n.º 20
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L',
                        '--limit',
                        action="store",
                        dest='limit',
                        help="Max download speed limit.")
    parser.add_argument('-D',
                        '--dir',
                        action="store",
                        dest='output_dir',
                        help="Download task to dir.")
    parser.add_argument('-S',
                        '--secret',
                        action="store",
                        dest='secret',
                        help="Retrieval password.",
                        default="")
    parser.add_argument('-P',
                        '--partial',
                        action="count",
                        help="Partial download.")
    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir

    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    # add 'http://'
    links = map(add_http, links)
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan()
            fis = pan.get_file_infos(url, secret)

            if namespace.partial:
                while True:
                    fis = select_download(fis)
                    if fis is not None:
                        break

            for fi in fis:
                cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
                if cookies and pan.pcsett:
                    cookies += ';pcsett={0}'.format(pan.pcsett)
                if cookies:
                    cookies += '"'

                savedir = fi.path.replace(fi.parent_path, '', 1)[1:]
                download_command(fi.filename,
                                 savedir,
                                 fi.dlink,
                                 cookies=cookies,
                                 limit=limit,
                                 output_dir=output_dir)

        elif res.get('type') == 4:
            pan = Pan()
            fsid = res.get('fsid')
            newUrl = res.get('url')
            infos = pan.get_file_infos(newUrl, secret, fsid)
            cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
            if cookies and pan.pcsett:
                cookies += ';pcsett={0}'.format(pan.pcsett)
            if cookies:
                cookies += '"'
            for info in infos:
                download_command(info.filename,
                                 info.dlink,
                                 cookies=cookies,
                                 limit=limit,
                                 output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            raise NotImplementedError('This function has not implemented.')
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logger.debug(url, extra={"type": "wrong link", "method": "None"})
            continue
        else:
            continue

    sys.exit(0)
Ejemplo n.º 21
0
def download(args):
    limit = global_config.limit
    output_dir = global_config.dir
    dl_all = global_config.dl_all
    link_file = global_config.link_file
    parser = argparse.ArgumentParser(description="download command arg parser")
    parser.add_argument('-L',
                        '--limit',
                        action="store",
                        dest='limit',
                        help="Max download speed limit.")
    parser.add_argument('-D',
                        '--dir',
                        action="store",
                        dest='output_dir',
                        help="Download task to dir.")
    parser.add_argument('-F',
                        '--file',
                        action="store",
                        dest='link_file',
                        help="Get list from file.")
    parser.add_argument('-S',
                        '--secret',
                        action="store",
                        dest='secret',
                        help="Retrieval password.",
                        default="")
    parser.add_argument('-A',
                        '--all',
                        action="store_true",
                        dest='dl_all',
                        help="Download all files without asking.",
                        default=False)
    if not args:
        parser.print_help()
        exit(1)
    namespace, links = parser.parse_known_args(args)
    secret = namespace.secret
    if namespace.limit:
        limit = namespace.limit
    if namespace.output_dir:
        output_dir = namespace.output_dir
    if namespace.link_file:
        # while using batch mode, automatically download all files.
        dl_all = True
        link_file = namespace.link_file
    if namespace.dl_all:
        dl_all = namespace.dl_all

    # get file lists from file.
    links = get_links_from_file(link_file, "\n")
    print(links)
    # if is wap
    links = [link.replace("wap/link", "share/link") for link in links]
    # add 'http://'
    links = map(add_http, links)
    for url in links:
        res = parse_url(url)
        # normal
        if res.get('type') == 1:
            pan = Pan()
            fis = pan.get_file_infos(url, secret)

            while True:
                if dl_all:
                    break
                fis = select_download(fis)
                if fis is not None:
                    break

            print(fis)
            for fi in fis:
                cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
                if cookies and pan.pcsett:
                    cookies += ';pcsett={0}'.format(pan.pcsett)
                if cookies:
                    cookies += '"'

                savedir = fi.path.replace(fi.parent_path, '', 1)[1:]
                download_command(fi.filename,
                                 savedir,
                                 fi.dlink,
                                 cookies=cookies,
                                 limit=limit,
                                 output_dir=output_dir)

        elif res.get('type') == 4:
            pan = Pan()
            fsid = res.get('fsid')
            newUrl = res.get('url')
            info = pan.get_dlink(newUrl, secret, fsid)
            cookies = 'BDUSS={0}'.format(pan.bduss) if pan.bduss else ''
            if cookies and pan.pcsett:
                cookies += ';pcsett={0}'.format(pan.pcsett)
            if cookies:
                cookies += '"'
            download_command(info.filename,
                             info.dlink,
                             cookies=cookies,
                             limit=limit,
                             output_dir=output_dir)

        # album
        elif res.get('type') == 2:
            raise NotImplementedError('This function has not implemented.')
        # home
        elif res.get('type') == 3:
            raise NotImplementedError('This function has not implemented.')
        elif res.get('type') == 0:
            logger.debug(url, extra={"type": "wrong link", "method": "None"})
            continue
        else:
            continue

    sys.exit(0)