コード例 #1
0
def find_matched_words_from_yahoo_ads():
    query = request.form['query']
    #yahooスポンサードサーチは単語ごとに区切るより一文にしたほうが広告出やすい
    head = 'http://search.yahoo.co.jp/search/ss?p='
    tail = '&ei=UTF-8&fr=top_ga1_sa&type=websearch&x=drt'
    url = head + query + tail
    y_ad_page = WebPage(url)
    y_ad_page.fetch_html()
    y_ad_page.fetch_ads()
    naradeha_results = []
    bracket_words = []
    for ad in y_ad_page.ads:
        ad.fetch_link_title()
        naradeha_results.extend(ad.pick_characteristic_words())
        bracket_words.extend(ad.pick_bracket_words())
    # naradeharesults => [{'なら': {'before': ['。', 'あの', '今石洋之']}}]
    # bracket_words => ['アスコルビン酸', 'メルトダウン']

    stop_words = ['公式', '楽天', '当日', 'お急ぎ便', 'ココ', 'ここ', 'これ', 'コレ', 'こちら', '公式', '購入', '人気', '詳細', '送料無料', '配送無料', '価格', '激安', '無料', 'アマゾン', 'ヤフオク', '0', '1', '2', '3']
    for num in range(0, 10):
        stop_words.append(str(num))
    results = naradeha_words_to_results(naradeha_results, stop_words)

    for bracket_word in bracket_words:
        is_including_stop_word = False
        for stop_word in stop_words:
            if stop_word in bracket_word:
                is_including_stop_word = True
                break
        if is_including_stop_word:
            continue
        results.append(bracket_word)

    return render_template('words.tmpl', words=results)
コード例 #2
0
def yahoo_sponsored_results():
    query = request.form['query']
    #yahooスポンサードサーチは単語ごとに区切るより一文にしたほうが広告出やすい
    head = 'http://search.yahoo.co.jp/search/ss?p='
    tail = '&ei=UTF-8&fr=top_ga1_sa&type=websearch&x=drt'
    url = head + query + tail
    y_ad_page = WebPage(url)
    y_ad_page.fetch_html()
    y_ad_page.fetch_ads()
    result_words = []
    key_phrases_of_ads = []
    Engine = SearchEngine()
    for ad in y_ad_page.ads:
        result_words.extend(ad.pick_nouns_and_verbs(ad.title))
        result_words.extend(ad.pick_nouns_and_verbs(ad.snippet))
        #key_phrases_of_ads.append(Engine.yahoo_key_phrase(ad.title))
        #key_phrases_of_ads.append(Engine.yahoo_key_phrase(ad.snippet))
    results = to_ranked_items(result_words)
    #return ad_template.render(items=results)
    return render_template('find_words_with_yahoo_ads.tmpl',
        items=results)
コード例 #3
0
ファイル: app.py プロジェクト: katryo/subtask_search
def yahoo_sponsored_results():
    query = request.forms.decode().get('query')
    #yahooスポンサードサーチは単語ごとに区切るより一文にしたほうが広告出やすい
    head = 'http://search.yahoo.co.jp/search/ss?p='
    tail = '&ei=UTF-8&fr=top_ga1_sa&type=websearch&x=drt'
    url = head + query + tail
    y_ad_page = WebPage(url)
    ads = y_ad_page.fetch_ads()
    v_and_s = []
    for ad in ads:
        v_and_s.extend(ad.pick_verbs(ad.title))
        v_and_s.extend(ad.pick_sahens(ad.title))
        v_and_s.extend(ad.pick_verbs(ad.snippet))
        v_and_s.extend(ad.pick_sahens(ad.snippet))
    results = to_ranked_items(v_and_s)
    return ad_template.render(items=results)
コード例 #4
0
def yahoo_sponsored_results():
    query = request.forms.decode().get('query')
    #yahooスポンサードサーチは単語ごとに区切るより一文にしたほうが広告出やすい
    head = 'http://search.yahoo.co.jp/search/ss?p='
    tail = '&ei=UTF-8&fr=top_ga1_sa&type=websearch&x=drt'
    url = head + query + tail
    y_ad_page = WebPage(url)
    ads = y_ad_page.fetch_ads()
    v_and_s = []
    for ad in ads:
        v_and_s.extend(ad.pick_verbs(ad.title))
        v_and_s.extend(ad.pick_sahens(ad.title))
        v_and_s.extend(ad.pick_verbs(ad.snippet))
        v_and_s.extend(ad.pick_sahens(ad.snippet))
    results = to_ranked_items(v_and_s)
    return ad_template.render(items=results)
コード例 #5
0
ファイル: app.py プロジェクト: katryo/subtask_search
def query_expansion():
    query = request.forms.decode().get('query')
    #yahooスポンサードサーチは単語ごとに区切るより一文にしたほうが広告出やすい
    head = 'http://search.yahoo.co.jp/search/ss?p='
    tail = '&ei=UTF-8&fr=top_ga1_sa&type=websearch&x=drt'
    url = head + query + tail
    y_ad_page = WebPage(url)
    ads = y_ad_page.fetch_ads()
    v_and_s = []
    for ad in ads:
        v_and_s.extend(ad.pick_verbs(ad.title))
        v_and_s.extend(ad.pick_sahens(ad.title))
        v_and_s.extend(ad.pick_verbs(ad.snippet))
        v_and_s.extend(ad.pick_sahens(ad.snippet))
    ranked_items = to_ranked_items(v_and_s)
    ranked_items.insert(0, {'name': 'まとめ', 'count': 100})
    normalized_query = normalize_query(query)
    query_words = normalized_query.split(' ')
    page_set = set()  # set型は重複をなくすため
    expanded_queries = []
    for item in ranked_items:
        top_5 = []
        if item['name'] in query_words:
            #'花粉症 対策'で検索したら'対策'がitem['name']に入っていたりする
            continue
        else:
            expanded_query = normalized_query + ' ' + item['name']
            expanded_queries.append(expanded_query)
            new_pages = search(expanded_query, 1)
            top_5 = new_pages[0:4]
        over_19 = False
        for one in top_5:
            page_set.add(one)
            if len(page_set) > 19:
                over_19 = True
                break
        if over_19 is True:
            break  # => ranked_items内で回すのから脱出
    return expand_template.render(pages=page_set, queries=expanded_queries)
コード例 #6
0
def query_expansion():
    query = request.forms.decode().get('query')
    #yahooスポンサードサーチは単語ごとに区切るより一文にしたほうが広告出やすい
    head = 'http://search.yahoo.co.jp/search/ss?p='
    tail = '&ei=UTF-8&fr=top_ga1_sa&type=websearch&x=drt'
    url = head + query + tail
    y_ad_page = WebPage(url)
    ads = y_ad_page.fetch_ads()
    v_and_s = []
    for ad in ads:
        v_and_s.extend(ad.pick_verbs(ad.title))
        v_and_s.extend(ad.pick_sahens(ad.title))
        v_and_s.extend(ad.pick_verbs(ad.snippet))
        v_and_s.extend(ad.pick_sahens(ad.snippet))
    ranked_items = to_ranked_items(v_and_s)
    ranked_items.insert(0, {'name': 'まとめ', 'count': 100})
    normalized_query = normalize_query(query)
    query_words = normalized_query.split(' ')
    page_set = set()  # set型は重複をなくすため
    expanded_queries = []
    for item in ranked_items:
        top_5 = []
        if item['name'] in query_words:
            #'花粉症 対策'で検索したら'対策'がitem['name']に入っていたりする
            continue
        else:
            expanded_query = normalized_query + ' ' + item['name']
            expanded_queries.append(expanded_query)
            new_pages = search(expanded_query, 1)
            top_5 = new_pages[0:4]
        over_19 = False
        for one in top_5:
            page_set.add(one)
            if len(page_set) > 19:
                over_19 = True
                break
        if over_19 is True:
            break  # => ranked_items内で回すのから脱出
    return expand_template.render(pages=page_set, queries=expanded_queries)