Ejemplo n.º 1
0
Archivo: yuedu.py Proyecto: spyth/xiami
def main():
    sys.stdout.write(u'正在努力请求节目单...')
    sys.stdout.flush()
    data = common.open_url(list_url)
    if not data:
        return
    menu_list = json.loads(data)['list']
    sys.stdout.write('\r')

    list_format = u'[{title}] by {author}  |  {player} {min:02}:{sec:02}'
    print u'{0:*^60}'.format(u'悦读FM.倾听文字的声音')
    print u'总共%d期.最新10期:'%len(menu_list)

    for i in range(0,10):
        print i,list_format.format(**menu_list[i])
    print u"\n输入序号下载,以','分开.'q'退出"

    while 1:
        usr_input = raw_input('Select(0-%d):'%(len(menu_list)-1))
        if usr_input == 'q':
            print 'bye!'
            break
        try:
            li = map(int, usr_input.split(','))
        except:
            print 'Input Error!'
        for i in li:
            if 0 <= i < len(menu_list):
                common.download(menu_list[i]['mp3'], _TARGET,\
                    menu_list[i]['title'], 'mp3', Referer='http://yuedu.fm/')
                article2Html(i, menu_list[i]['title'])
Ejemplo n.º 2
0
Archivo: yuedu.py Proyecto: spyth/xiami
def article2Html(num, filename):
    data = common.open_url(item_url%num) 
    item = json.loads(data)['item'][0]
    #common.download(item['bg'], _TARGET, filename, 'jpg') 
    with codecs.open('%s/%s.html'%(_TARGET, filename), 'w', 'utf-8') as fd:
        fd.write('<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>')
        fd.write(item['text'])
        fd.write('</body></html>')
Ejemplo n.º 3
0
def replays(url):
    items = []
    link = common.open_url(url)
    link = link.replace('\n', '')
    match = re.compile(
        '<script type="application/ld\+json">(.+?)</script>').findall(link)
    for data in match:
        if 'location' not in data:
            pass
        else:
            data = str(data)
            data = json.loads(data)
            for i in data:
                try:
                    url = i['offers']['url']
                except:
                    pass

                try:
                    date = i['startDate'][:10]
                except:
                    pass

                try:
                    price = i['offers']['price']
                except:
                    pass

                try:
                    img = i['image']
                except:
                    pass

                try:
                    title = i['name']
                except:
                    pass

                if url not in items:
                    items.append(url)
                    title = '[B]%s[/B] | %s' % (date, title)
                    if price == '0':
                        common.add_link(title, url, 1, img)
                    else:
                        pass
                else:
                    pass

    more = re.compile(
        '<div class="show-more"><a href="(.+?)">Show more').findall(link)
    for url in more:
        img = base_img
        title = '[I]More...[/I]'
        url = 'https://www.fite.tv%s' % url
        common.add_dir(title, url, 2, img)
    def test_price_sanity(self):
        price = self.product.get_price()
        node = self.browse_nodes.Items.Item.OfferSummary
        if hasattr(node, "LowestNewPrice"):
            our_price = node.FormattedPrice

        else:
            html_text = open_url(self.product.page_url).text
            soup = BeautifulSoup(html_text)
            our_price = soup.findAll("span", id="priceblock_ourprice")[0].text

        self.assertTrue(our_price == price)
Ejemplo n.º 5
0
def download_pic(url):
    print url
    html = common.open_url(url)
    find_re = re.compile(r'<li id.+?<img src="(.+?)"', re.DOTALL)
    img_url = find_re.findall(html)
    print 'Start download %d pics'%len(img_url) 
    for url in img_url:
        if url:
            filename,ext = os.path.splitext(os.path.split(url)[-1])
            if not ext:
                ext = '.jpg'
            common.download(url, TARGET, filename, ext[1:], Referer=url)
Ejemplo n.º 6
0
Archivo: luoo.py Proyecto: spyth/xiami
def download_show(li):
    for num in li:
        if num > 296: 
            url = xml_url_1%num
        else:
            url = xml_url_2%num 
        xml_data = common.open_url(url)
        if xml_data:
            songlist = extract(xml_data)
            target_dir = TARGET%num
            for title, location in songlist:
                ext = location.split('.')[-1]
                common.download(location, target_dir, title, ext, Referer=referer%num)
    def _upload_image(self, img_url):
        """
        Read the binary img and let the XMLRPC library encode it into base64.
        :param img_url: The url to img file.
        :return: attachment_id.
        """
        content_file = open_url(img_url).content
        data = {
            'name': 'picture.jpg',
            'type': 'image/jpeg',
            'bits': xmlrpc_client.Binary(content_file)
        }

        response = self.client.call(media.UploadFile(data))
        return response['id']
    def test_review_sanity(self):
        review = self.product.get_review()
        five_stars_review_url = "http://www.amazon.com/product-reviews/{0}/?ie=" "UTF8&filterBy=addFiveStar".format(
            self.product.ASIN
        )

        html_text = open_url(five_stars_review_url).text
        soup = BeautifulSoup(html_text)
        all_reviews = soup.findAll("span", "a-size-base review-text")
        if len(all_reviews) == 0:
            self.assertTrue("null" == review)

        all_reviews = [R.text for R in all_reviews]
        all_reviews = sorted(all_reviews, key=lambda word: len(word), reverse=True)
        self.assertTrue(all_reviews[0].encode("utf-8") == review)
Ejemplo n.º 9
0
    def get_review(self):
        """
        Find the longest review with five stars.
        :return: Review - str.
        """
        if self.review:
            return self.review

        html_text = open_url(self.five_stars_review_url).text
        soup = BeautifulSoup(html_text)
        all_reviews = soup.findAll("span", "a-size-base review-text")
        if len(all_reviews) > 0:
            all_reviews = [review.text for review in all_reviews]
            all_reviews = sorted(all_reviews, key=lambda word: len(word), reverse=True)
            self.review = all_reviews[0].encode('utf-8')
            return self.review

        self.review = 'null'
        return self.review
Ejemplo n.º 10
0
 def __init__(self, product_item, browse_nodes):
     self.browse_nodes = browse_nodes
     self.ASIN = product_item.ASIN.text
     self.page_url = unquote(product_item.DetailPageURL.text)
     self.title = product_item.ItemAttributes.Title.text.encode('utf-8')
     html_text = open_url(self.page_url).text
     self.soup = BeautifulSoup(html_text)
     self.categories = []
     self.img_urls = {}
     self.price = ''
     self.rating = ''
     self.review = ''
     self.features = ''
     self.num_of_reviews = None
     self.five_stars_review_url = 'http://www.amazon.com/product-reviews/{0}/?ie=' \
                                  'UTF8&filterBy=addFiveStar&tag={1}'.format(self.ASIN, CONFIG['associate_tag'])
     try:
         self.manufacturer = str(product_item.ItemAttributes.Manufacturer)
     except:
         self.manufacturer = 'null'
Ejemplo n.º 11
0
Archivo: xiami.py Proyecto: spyth/xiami
def main():
    if len(sys.argv) < 3 or (sys.argv[1] != '-t' and len(sys.argv) > 3):
        help_info()
        return
    if sys.argv[1] == '-a':
        url = _albumUrl % sys.argv[2]
    elif sys.argv[1] == '-c':
        url = _collectUrl % sys.argv[2]
    elif sys.argv[1] == '-t':
        url = _trackUrl % ','.join(sys.argv[2:])
    else :
        help_info()
        return
    content = common.open_url(url)
    if not content:
        return
    res = extract(content)
    for title,uri,lrc in res:
        common.download(uri,TARGET,title,'mp3')
        if lrc:
Ejemplo n.º 12
0
def main():
    print u'无聊图请按1,妹纸图请按2, 其它自动挂机。'
    choice = raw_input('>')
    global TARGET
    if choice == '1':
        url_default = wuliao_default
        url_page = wuliao_page
        TARGET = os.path.join(TARGET, 'jandan-pic')
    elif choice == '2':
        url_default = meizi_default
        url_page = meizi_page
        TARGET = os.path.join(TARGET, 'jandan-ooxx')
    else:
        print 'bye!'
        return
    html = common.open_url(url_default)
    find_RE = re.compile(r'>\[(.+?)\]')
    result = find_RE.findall(html)
    cur_page = int(result[0])
    print 'Current Page Number:%d'%cur_page
    cnt = int(raw_input('How many pages do you want to download? \n>'))
    for i in range(0,cnt):
        download_pic(url_page%(cur_page-i))
 def test_get_price(self):
     rating = self.product.get_rating()
     html_text = open_url(self.product.page_url).text
     soup = BeautifulSoup(html_text)
     our_rating = soup.findAll("div", id="avgRating")[0].span.a.span.text[:3]
     self.assertTrue(our_rating == rating)