Python process_textの例、util.http.process_text Pythonの例

コード例 #1

0

ファイルを表示

def ebay_url(match, bot):
    apikey = bot.config.get("api_keys", {}).get("ebay")
    # if apikey:
    #     # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I)
    #     itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I)
    #     url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1))
    #     print url

    # else:
    print "No eBay api key set."
    item = http.get_html(match)
    title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip()
    price = item.xpath("//span[@id='prcIsum_bidPrice']/text()")
    if not price: price = item.xpath("//span[@id='prcIsum']/text()")
    if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()")
    if price: price = price[0].strip()
    else: price = '?'

    try:
        bids = item.xpath("//span[@id='qty-test']/text()")[0].strip()
    except:
        bids = "Buy It Now"

    feedback = item.xpath("//span[@class='w2b-head']/text()")
    if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()")
    if feedback: feedback = feedback[0].strip()
    else: feedback = '?'

    return http.process_text(
        "\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(
            title, price, bids, feedback))

コード例 #2

0

ファイルを表示

ファイル: urls.py プロジェクト: Anonymike/pasta-bot

def ebay_url(match,bot):
    apikey = bot.config.get("api_keys", {}).get("ebay")
    # if apikey:
    #     # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I)
    #     itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I)
    #     url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1))
    #     print url

    # else:
    print "No eBay api key set."
    item = http.get_html(match)
    title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip()
    price = item.xpath("//span[@id='prcIsum_bidPrice']/text()")
    if not price: price = item.xpath("//span[@id='prcIsum']/text()")
    if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()")
    if price: price = price[0].strip()
    else: price = '?'

    try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip()
    except: bids = "Buy It Now"

    feedback = item.xpath("//span[@class='w2b-head']/text()")
    if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()")
    if feedback: feedback = feedback[0].strip()
    else: feedback = '?'

    return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(title, price, bids, feedback))

コード例 #3

0

ファイルを表示

ファイル: parsers.py プロジェクト: oktavialdyRandika/UguuBot

def unmatched_url(match):
    content_type = None
    length = None
    title = None
    result = None
    page = urllib.urlopen(match)
    content_type = page.info()['Content-Type'].split(';')[0]
    if content_type.find("html") != -1:
      soup = BeautifulSoup(page)
      title = soup.title.renderContents().strip()
      if len(title) > 300: title = soup.find('meta', {'name' : 'description'})['content']
    elif content_type.find("image") != -1:
      if page.info()['Content-Length']:
        length = int(page.info()['Content-Length'])
        if length > 1048576: length = str(length / 1048576) + ' MiB'
        elif length > 1024: length = str(length / 1024) + ' KiB'
        elif length < 0: length = 'Unknown size'
        else: length = str(length) + ' B'
      else: length = "Unknown size"
    else: title = "I messed up. Bad."

    result = ''
    if length != None:
      result += ('[%s] %s ' % (content_type, length))
    if title != None:
      result += ('%s' % (title)) 

    return ('%s' % (http.process_text(result)))

コード例 #4

0

ファイルを表示

ファイル: urls.py プロジェクト: Anonymike/pasta-bot

def fourchanthread_url(match):
    soup = http.get_soup(match)
    title = soup.title.renderContents().strip()
    post = soup.find('div', {'class': 'opContainer'})
    comment = post.find('blockquote', {'class': 'postMessage'}).renderContents().strip()
    author = post.find_all('span', {'class': 'nameBlock'})[1]
    return http.process_text("\x02{}\x02 - posted by \x02{}\x02: {}".format(title, author, comment[:trimlength]))

コード例 #5

0

ファイルを表示

def wikipedia_url(match):
    soup = http.get_soup(match)
    title = soup.find('h1', {'id': 'firstHeading'}).renderContents().strip()
    post = soup.find('p').renderContents().strip().replace('\n', '').replace(
        '\r', '')
    return http.process_text("\x02Wikipedia.org: {}\x02 - {}...".format(
        title, post[:trimlength]))

コード例 #6

0

ファイルを表示

def ebay_url(match, bot):
    item = http.get_html(match)
    title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip()
    price = item.xpath("//span[@id='prcIsum_bidPrice']/text()")
    if not price:
        price = item.xpath("//span[@id='prcIsum']/text()")
    if not price:
        price = item.xpath("//span[@id='mm-saleDscPrc']/text()")
    if price:
        price = price[0].strip()
    else:
        price = '?'

    try:
        bids = item.xpath("//span[@id='qty-test']/text()")[0].strip()
    except:
        bids = "Buy It Now"

    feedback = item.xpath("//span[@class='w2b-head']/text()")
    if not feedback:
        feedback = item.xpath("//div[@id='si-fb']/text()")
    if feedback:
        feedback = feedback[0].strip()
    else:
        feedback = '?'

    return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(
        title, price, bids, feedback))

コード例 #7

0

ファイルを表示

ファイル: parsers.py プロジェクト: oktavialdyRandika/UguuBot

def fourchan_url(match):
    soup = http.get_soup(match)
    title = soup.title.renderContents().strip()
    post = soup.find('div', {'class': 'opContainer'})
    comment = post.find('blockquote', {'class': 'postMessage'})
    author = post.find_all('span', {'class': 'nameBlock'})[1]
    return http.process_text('\x02%s\x02 - posted by \x02%s\x02: %s' % (title, author, comment))

コード例 #8

0

ファイルを表示

def fourchanthread_url(match):
    soup = http.get_soup(match)
    title = soup.title.renderContents().strip()
    post = soup.find('div', {'class': 'opContainer'})
    comment = post.find('blockquote', {'class': 'postMessage'}).renderContents().strip()
    author = post.find_all('span', {'class': 'nameBlock'})[1]
    return http.process_text("\x02{}\x02 - posted by \x02{}\x02: {}".format(
        title, author, comment[:trimlength]))

コード例 #9

0

ファイルを表示

ファイル: 4chan.py プロジェクト: bytebit-ch/uguubot

def get_title(url):
    soup = http.get_soup(url)

    if "#" in url:
        postid = url.split("#")[1]
        post = soup.find("div", {"id": postid})
    else:
        post = soup.find("div", {"class": "opContainer"})

    comment = http.process_text(post.find("blockquote", {"class": "postMessage"}).renderContents().strip())
    return "{} - {}".format(url, comment)  #

コード例 #10

0

ファイルを表示

def get_title(url):
    soup = http.get_soup(url)

    if '#' in url:
        postid = url.split('#')[1]
        post = soup.find('div', {'id': postid})
    else:
        post = soup.find('div', {'class': 'opContainer'})
    
    comment = http.process_text(post.find('blockquote', {'class': 'postMessage'}).renderContents().strip())
    return u"{} - {}".format(url, comment) #

コード例 #11

0

ファイルを表示

ファイル: parsers.py プロジェクト: oktavialdyRandika/UguuBot

def fourchanquote_url(match):
    postid = match.split('#')[1]
    soup = http.get_soup(match)
    title = soup.title.renderContents().strip()
    post = soup.find('div', {'id': postid})
    comment = post.find('blockquote', {'class': 'postMessage'}).renderContents().strip()
    #comment = re.sub('&gt;&gt;\d*[\s]','',comment) #remove quoted posts
    #comment = re.sub('(&gt;&gt;\d*)','',comment)
    #comment = re.sub('[\|\s]{2,50}','',comment) #remove multiple | | | |
    #comment = re.sub('[\s]{3,}','  ',comment) #remove multiple spaces
    author = post.find_all('span', {'class': 'nameBlock'})[1].renderContents().strip()
    return http.process_text('\x02%s\x02 - posted by \x02%s\x02: %s' % (title, author, comment))

コード例 #12

0

ファイルを表示

ファイル: amazon.py プロジェクト: homescreens/uguubot

def amazon_url(match):
    item = http.get_html(match.group(1))
    title = item.xpath('//title/text()')[0]
    try: price = item.xpath("//span[@id='priceblock_ourprice']/text()")[0]
    except: price = "$?"
    rating = item.xpath("//div[@id='avgRating']/span/text()")[0].strip()

    star_count = round(float(rating.split(' ')[0]),0)
    stars=""
    for x in xrange(0,int(star_count)):
        stars = "{}{}".format(stars,'★')
    for y in xrange(int(star_count),5):
        stars = "{}{}".format(stars,'☆')

    try: return ('\x02{}\x02 - \x02{}\x02 - \x034{}\x034'.format(title, stars, price)).decode('utf-8')
    except: return http.process_text('\x02{}\x02 - \x02{}\x02 - \x034{}\x034'.format(title, stars, price))

コード例 #13

0

ファイルを表示

def imgur_url(match):
    soup = http.get_soup(match)
    title = soup.find('h1', {
        'class': 'post-title font-opensans-bold'
    }).renderContents().strip()
    return http.process_text("[IMGUR] {}".format(title))

コード例 #14

0

ファイルを表示

def fourchanboard_url(match):
    soup = http.get_soup(match)
    title = soup.title.renderContents().strip()
    return http.process_text("\x02{}\x02".format(title[:trimlength]))

コード例 #15

0

ファイルを表示

ファイル: urls.py プロジェクト: ewhal/uguubot

def imgur_url(match):
    soup = http.get_soup(match)
    title = soup.find('h1', {'class': 'post-title font-opensans-bold'}).renderContents().strip()
    return http.process_text("[IMGUR] {}".format(title))

コード例 #16

0

ファイルを表示

def craigslist_url(match):
    soup = http.get_soup(match)
    title = soup.find('h2', {'class': 'postingtitle'}).renderContents().strip()
    post = soup.find('section', {'id': 'postingbody'}).renderContents().strip()
    return http.process_text("\x02Craigslist.org: {}\x02 - {}".format(title, post[:trimlength]))

コード例 #17

0

ファイルを表示

ファイル: urls.py プロジェクト: Anonymike/pasta-bot

def wikipedia_url(match):
    soup = http.get_soup(match)
    title = soup.find('h1', {'id': 'firstHeading'}).renderContents().strip()
    post = soup.find('p').renderContents().strip().replace('\n','').replace('\r','')
    return http.process_text("\x02Wikipedia.org: {}\x02 - {}...".format(title,post[:trimlength]))

コード例 #18

0

ファイルを表示

ファイル: urls.py プロジェクト: Anonymike/pasta-bot

def fourchanboard_url(match):
    soup = http.get_soup(match)
    title = soup.title.renderContents().strip()
    return http.process_text("\x02{}\x02".format(title[:trimlength]))

コード例 #19

0

ファイルを表示

ファイル: parsers.py プロジェクト: oktavialdyRandika/UguuBot

def craigslist_url(match):
    soup = http.get_soup(match)
    title = soup.find('h2', {'class': 'postingtitle'}).renderContents().strip()
    post = soup.find('section', {'id': 'postingbody'}).renderContents().strip()
    return http.process_text('\x02Craigslist.org: %s\x02 - %s' % (title, post))

コード例 #20

0

ファイルを表示

ファイル: urls.py プロジェクト: Anonymike/pasta-bot

def craigslist_url(match):
    soup = http.get_soup(match)
    title = soup.find('h2', {'class': 'postingtitle'}).renderContents().strip()
    post = soup.find('section', {'id': 'postingbody'}).renderContents().strip()
    return http.process_text("\x02Craigslist.org: {}\x02 - {}".format(title, post[:trimlength]))