Beispiel #1
0
def search():
    search_phrase = utils.get_keywords(3)
    needl.log.info('Searching Google for: %s', search_phrase)

    browser = utils.get_browser()
    browser.get(GOOGLE)
    search_form = browser.find_element_by_id('tsf')
    browser.find_elements_by_css_selector('input[name=q]')[0].send_keys(
        search_phrase)
    search_form.submit()

    results_count = browser.find_element_by_id('resultStats').text.rstrip()
    needl.log.debug('%s for %s', results_count, search_phrase)

    if needl.settings['google']['click_through']:
        links = [
            link for link in browser.find_elements_by_css_selector('h3.r > a')
            if utils.url_is_absolute(link.get_attribute('href'))
        ]

        if len(links) > 0:
            link = needl.rand.choice(links).get_attribute('href')
            needl.log.info('Visiting %s', link)
            browser.get(link)

            click_depth = needl.settings['google']['click_depth']
            if click_depth > 0:
                utils.process_click_depth(browser, click_depth)

    browser.quit()
Beispiel #2
0
def search():
    search_phrase = utils.get_keywords(3)
    needl.log.info('Searching Google for: %s', search_phrase)

    browser = utils.get_browser()
    page = browser.get(GOOGLE)
    search_form = page.soup.select('form[name=f]')[0]
    search_form.select('input[name=q]')[0]['value'] = search_phrase

    try:
        search_form.select('input[name=btnI]')[0][
            'name'] = ''  # hack so mechanicalsoup doesn't request I'm Feeling Lucky results
    except IndexError:
        pass

    search_results = browser.submit(search_form, page.url)
    results_count = search_results.soup.find('div',
                                             id='resultStats').text.rstrip()
    needl.log.debug('%s for %s', results_count, search_phrase)

    if needl.settings['google']['click_through']:
        links = [
            link for link in search_results.soup.select('h3.r > a')
            if utils.url_is_absolute(link.get('href'))
        ]

        if len(links) > 0:
            link = needl.rand.choice(links).get('href')
            needl.log.info('Visiting %s', link)
            page = browser.get(link)

            click_depth = needl.settings['google']['click_depth']
            if click_depth > 0:
                utils.process_click_depth(browser, page, click_depth)
Beispiel #3
0
def visit():
    site = get_random_site()

    needl.log.info('Visiting %s', site)
    browser = utils.get_browser()
    browser.get(site)
    utils.process_click_depth(browser, needl.settings['alexa']['click_depth'])
Beispiel #4
0
def get_user():
    first = utils.get_line(needl.args.datadir + '/first-names.txt').title()
    last = utils.get_line(needl.args.datadir + '/last-names.txt').title()

    needl.log.info('Finding Twitter user: "******"', first, last)

    browser = utils.get_browser()
    page = browser.get(TWITTER.format(first + last))

    if page.status_code is not 200:
        needl.log.debug('Twitter user "%s %s" not found', first, last)
        return
Beispiel #5
0
def get_user():
    first = utils.get_line(needl.args.datadir + '/first-names.txt').title()
    last = utils.get_line(needl.args.datadir + '/last-names.txt').title()

    needl.log.info('Finding Twitter user: "******"', first, last)

    browser = utils.get_browser()
    browser.get(TWITTER.format(first + last))

    if "Sorry, that page doesn’t exist!" in browser.page_source:
        needl.log.debug('Twitter user "%s %s" not found', first, last)

    browser.quit()
Beispiel #6
0
def search():
    hashtag = '#' + utils.get_keywords(1)

    needl.log.info('Searching Twitter for: "%s"', hashtag)

    browser = utils.get_browser()
    browser.get(TWITTER.format('search?f=tweets&vertical=default&q=' + url.quote_plus(hashtag) + '&src=typd'))

    try:
        first_tweet_by = browser.find_elements_by_css_selector('.stream-item > .tweet')[0].get_attribute('data-screen-name')
        needl.log.info('Latest Tweet for %s by %s', hashtag, first_tweet_by)
    except:
        pass
    finally:
        browser.quit()
Beispiel #7
0
def search():
    hashtag = '#' + utils.get_keywords(1)

    needl.log.info('Searching Twitter for: "%s"', hashtag)

    browser = utils.get_browser()
    page = browser.get(
        TWITTER.format('search?f=tweets&vertical=default&q=' +
                       url.quote_plus(hashtag) + '&src=typd'))

    try:
        first_tweet_by = page.soup.select(
            '.stream-item > .tweet')[0]['data-screen-name']
        needl.log.info('Latest Tweet for %s by %s', hashtag, first_tweet_by)
    except:
        pass