コード例 #1
0
ファイル: hasher.py プロジェクト: seraphln/chat2all
def get_hexdigest(algorithm, salt, raw_password):
    raw_password, salt = smart_str(raw_password), smart_str(salt)
    if algorithm == 'md5':
        return hashlib.md5(salt + raw_password).hexdigest()
    elif algorithm == 'sha1':
        return hashlib.sha1(salt + raw_password).hexdigest()
    raise ValueError('Got unknown password algorithm type in password')
コード例 #2
0
ファイル: util.py プロジェクト: numero/prospero
def get_hexdigest(salt, raw_password):
    """
    Returns a string of the hexdigest of the given plaintext password and salt
    using the given algorithm ('md5', 'sha1' or 'crypt').
    """
    raw_password, salt = smart_str(raw_password), smart_str(salt)
    
    return hashlib.sha1(salt + raw_password).hexdigest()
コード例 #3
0
def get_author_list(str_url,site_category):
    soup = parser(str_url)
    plist = SoupStrainer(id="experts")
    content = soup.find_all(plist)
    dd_list = content[0].select("dd")

    cur_page_data_list = []

    site = 'blog.csdn.net'
    for dd in dd_list:
        author_info_list = []
        d_a =  dd.select("a")
        url = smart_str(d_a[0]['href'].strip())
        user_name = url.split('net/')[1]
        #real_name = smart_str(d_a[0].text.strip())

        rss = url+'/rss/list'

        #print url
        author_info_list.append(site)
        author_info_list.append(url)
        #author_info_list.append(site_category)
        author_info_list.append(user_name)
        #author_info_list.append(real_name)
        author_info_list.append(rss)
        author_info_list.append(datehelper.now_datetime())
        cur_page_data_list.append(tuple(author_info_list))
       # print author_info_list
    return cur_page_data_list
コード例 #4
0
ファイル: text.py プロジェクト: c24b/clean_crawtext
def encodeValue(value):
    string_org = value
    try:
        value = smart_unicode(value)
    except (UnicodeEncodeError):
        value = smart_str(value)
    except:
        value = string_org
    return value
コード例 #5
0
def xml_to_dict(raw_str):
    raw_str = smart_str(raw_str)
    raw_str = str(raw_str)
    msg = {}
    root_elem = ET.fromstring(raw_str)
    if root_elem.tag == 'xml':
        for child in root_elem:
            msg[child.tag] = smart_unicode(child.text)
            msg[child.tag] = unicode(child.text)
        return msg
    else:
        return None
コード例 #6
0
def main(str_url):
    parserLinks = ParserLinks()
    web = urllib.urlopen(str_url)
    for context in web.readlines():
        _str = smart_str(context).decode('utf-8')
        try:
            parserLinks.feed(_str)
        except HTMLParser.HTMLParseError:
            print traceback.format_exc()
            pass
    web.close()
    image_list= parserLinks.get_file_list()
    down_jpg_mutithread(image_list)
コード例 #7
0
def feed_favorite(parser):
    article_list = parser.find_all('article')
    for my_article in article_list:
        state = []
        all_content = ''
        for my_tag in my_article.h3.contents:
            factor = my_tag.string
            if factor != None:
                factor = factor.strip(u'\r\n')
                factor = factor.strip(u'\n')
                factor = factor.replace(u'\n','')
                factor = factor.strip()
                state.append(smart_str(factor))
        content = my_article.select('.content')
        duration = my_article.select('.duration')
        if content:
            block_quote = content[0].select('blockquote')
            if block_quote:
                content_detail = smart_str(block_quote[0].contents[0].string)
                state.append(content_detail)
        if duration:
            state.append(smart_str(duration[0].string))
        print ''.join(state)
コード例 #8
0
def countSale(str_url):
    soup = parser(str_url)
    items = soup.select('.item')
    sale = 0.0  #销售总额
    for item in items:
        amount = 0  #单件商品销售数量
        pricestr = item.select('strong')[0]
        amount_list = item.select('em')
        if len(amount_list) != 0:
            amount = int(amount_list[0].contents[0])
            #price = re.sub(r'</?\w+[^>]*>','',pricestr)
        price_str = smart_str(pricestr.contents[0])
        #price = filter(str.isdigit, price_str)
        price = float(filter(lambda ch: ch in r'0123456789.', price_str))
        sale += round(price * amount, 2)
    return sale
コード例 #9
0
def countSale(str_url):
    soup = parser(str_url)
    items = soup.select('.item')
    sale = 0.0 #销售总额
    for item in items:
        amount = 0 #单件商品销售数量
        pricestr = item.select('strong')[0]
        amount_list = item.select('em')
        if len(amount_list) != 0:
            amount = int(amount_list[0].contents[0])
            #price = re.sub(r'</?\w+[^>]*>','',pricestr)
        price_str = smart_str(pricestr.contents[0])
        #price = filter(str.isdigit, price_str)
        price =float(filter(lambda ch: ch in r'0123456789.', price_str))
        sale += round(price*amount,2)
    return sale
コード例 #10
0
ファイル: ssq.py プロジェクト: huligong1234/python-study
def get_ssq_list(str_url):
    cur_page_data_list = []
    soup = parser(str_url)
    plist = SoupStrainer("table")
    content = soup.find_all(plist)
    content_table =  content[0]
    trs = content_table.find_all('tr')
    for i,tr in enumerate(trs):
        ssq_info_list = []
        if i >1:
            tds = tr.find_all('td')
            if (len(tds)>5):
                kjrq = tds[0].text #开奖日期
                jh = tds[1].text #期号
                qiu = tds[2]
                ems = qiu.select('em')
                red1 = ems[0].text
                red2 = ems[1].text
                red3 = ems[2].text
                red4 = ems[3].text
                red5 = ems[4].text
                red6 = ems[5].text
                blue1 = ems[6].text
                sales = tds[3].contents[0].string #销售额
                first = tds[4].contents[0].string #一等奖个数
                second = tds[5].contents[0].string #二等奖个数
                distribution = smart_str(tds[4].contents[1].string).replace("(","").replace("..","").replace(")","").strip() #一等奖分布

                ssq_info_list.append(kjrq)
                ssq_info_list.append(jh)
                ssq_info_list.append(red1)
                ssq_info_list.append(red2)
                ssq_info_list.append(red3)
                ssq_info_list.append(red4)
                ssq_info_list.append(red5)
                ssq_info_list.append(red6)
                ssq_info_list.append(blue1)
                ssq_info_list.append(sales)
                ssq_info_list.append(first)
                ssq_info_list.append(second)
                ssq_info_list.append(distribution)
                cur_page_data_list.append(tuple(ssq_info_list))
                #print kjrq,jh,red1,red2,red3,red4,red5,red6,blue1,sales,first,second,distribution

    return cur_page_data_list
コード例 #11
0
def get_goods_data_list(str_url):
    soup = parser(str_url)
    goods_name = smart_str(soup.select('.tb-detail-hd')[0].h3.contents[0].string)
    #print goods_name
    property = soup.select('.tb-property')
    #attributes = soup.select('.attributes')
    #tb-meta
    goods_price = property[0].select('.tb-detail-price')
    j_str_price = goods_price[0].select('strong')[0].text
    #j_promo_price = goods_price[1].select('div')[0]
    #print goods_price
    #print j_str_price
    #print j_promo_price
    sold_out = property[0].select('.tb-sold-out.tb-clearfix')
    evaluate = property[0].select('.tb-evaluate.tb-clearfix')
    print property[0].select('.tb-key.tb-key-sku')

    print evaluate
コード例 #12
0
def get_book_list(str_url):
    soup = parser(str_url)
    plist = SoupStrainer(id="plist")
    content = soup.find_all(plist)
    items = soup.select('.item')
    cur_page_data_list = []
    for item in items:
        book_info_list = []
        index = smart_str(item.select('.index')[0].contents[0].string)  #图书排行

        p_name = item.select('.p-name')[0].contents[0]
        book_url = smart_str(p_name['href'])  #图书链接地址
        book_name = smart_str(p_name.text)  #图书名称

        book_info = item.select('.p-info')
        book_publisher_auther = book_info[0]

        book_p_a_len = len(list(book_publisher_auther.select('a')))
        book_auther = ''  #图书作者
        book_trans_auther = ''  #图书译者
        book_publisher = ''  #图书出版社
        if book_p_a_len == 2:
            book_auther = smart_str(book_publisher_auther.select('a')[1].text)
            book_publisher = smart_str(
                book_publisher_auther.select('a')[1].text)
        elif book_p_a_len == 1:
            book_auther = ''
            book_publisher = smart_str(
                book_publisher_auther.select('a')[0].text)
        elif book_p_a_len == 3:
            book_auther = smart_str(book_publisher_auther.select('a')[0].text)
            book_trans_auther = smart_str(
                book_publisher_auther.select('a')[1].text)
            book_publisher = smart_str(
                book_publisher_auther.select('a')[2].text)

        book_img = smart_str(item.select('.p-img.bookimg')[0].img['src'])

        book_prices = book_info[1]
        del_price = (smart_str(book_prices.select('del')[0].text)).replace(
            '¥', '')  #定价
        jd_price = (smart_str(book_prices.select('span')[0].text)).replace(
            '¥', '')  #京东价

        #print index,book_name,book_url
        #print book_auther,book_trans_auther,book_publisher
        #print del_price,jd_price
        #print book_img
        #print '|'.join(book_info_list)

        book_info_list.append(index)
        book_info_list.append(book_name)
        book_info_list.append(book_img)
        book_info_list.append(book_url)
        book_info_list.append(book_auther)
        book_info_list.append(book_trans_auther)
        book_info_list.append(book_publisher)
        book_info_list.append(del_price)
        book_info_list.append(jd_price)
        cur_page_data_list.append(tuple(book_info_list))
    return cur_page_data_list
コード例 #13
0
def get_book_list(str_url):
    soup = parser(str_url)
    plist = SoupStrainer(id="plist")
    content = soup.find_all(plist)
    items = soup.select('.item')
    cur_page_data_list = []
    for item in items:
        book_info_list = []
        index = smart_str(item.select('.index')[0].contents[0].string) #图书排行

        p_name = item.select('.p-name')[0].contents[0]
        book_url = smart_str(p_name['href']) #图书链接地址
        book_name = smart_str(p_name.text) #图书名称

        book_info = item.select('.p-info')
        book_publisher_auther = book_info[0]

        book_p_a_len =  len(list(book_publisher_auther.select('a')))
        book_auther = '' #图书作者
        book_trans_auther = '' #图书译者
        book_publisher = '' #图书出版社
        if book_p_a_len == 2:
            book_auther = smart_str(book_publisher_auther.select('a')[1].text)
            book_publisher = smart_str(book_publisher_auther.select('a')[1].text)
        elif book_p_a_len == 1:
            book_auther = ''
            book_publisher = smart_str(book_publisher_auther.select('a')[0].text)
        elif book_p_a_len == 3:
           book_auther =  smart_str(book_publisher_auther.select('a')[0].text)
           book_trans_auther = smart_str(book_publisher_auther.select('a')[1].text)
           book_publisher = smart_str(book_publisher_auther.select('a')[2].text)

        book_img = smart_str(item.select('.p-img.bookimg')[0].img['src'])

        book_prices = book_info[1]
        del_price = (smart_str(book_prices.select('del')[0].text)).replace('¥','') #定价
        jd_price = (smart_str(book_prices.select('span')[0].text)).replace('¥','') #京东价

        #print index,book_name,book_url
        #print book_auther,book_trans_auther,book_publisher
        #print del_price,jd_price
        #print book_img
        #print '|'.join(book_info_list)

        book_info_list.append(index)
        book_info_list.append(book_name)
        book_info_list.append(book_img)
        book_info_list.append(book_url)
        book_info_list.append(book_auther)
        book_info_list.append(book_trans_auther)
        book_info_list.append(book_publisher)
        book_info_list.append(del_price)
        book_info_list.append(jd_price)
        cur_page_data_list.append(tuple(book_info_list))
    return cur_page_data_list