Python Configuration.browser_user_agent 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: newspaper.configuration

클래스/타입: Configuration

메소드/함수: browser_user_agent

hotexamples.com에서의 예제들: 3

Python Configuration.browser_user_agent - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 newspaper.configuration.Configuration.browser_user_agent에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Configuration(26)

language(6)

fetch_images(5)

follow_meta_refresh(4)

get_parser(4)

browser_user_agent(3)

memoize_articles(3)

keep_article_html(2)

MAX_AUTHORS(1)

MAX_TITLE(1)

MIN_WORD_COUNT(1)

is_memoize_articles(1)

verbose(1)

예제 #1

파일 보기

def get_articles_c_tribune(complement):
    '''
    Given a string (complement) of the form 2011/01/01,
    get articles from the Chicago Tribune Archives.

    Inputs: a string called complement containing the date
            for a given day
    Returns: 
            info dictionary for that day
            writes csv file with nltk scores for complement

    '''
    c_tribune = 'http://articles.chicagotribune.com/'
    archive_url = c_tribune + complement + '/'
    articles = {}
    pm = urllib3.PoolManager()
    html = pm.urlopen(url=archive_url, method="GET").data
    soup = bs4.BeautifulSoup(html, 'lxml')
    #print(soup)
    tag_list = soup.find_all('h3')

    if tag_list:
        for index, tag in enumerate(tag_list):
            rv = {}
            articles[index] = rv
            article = c_tribune + tag.a['href']
            #print(article)
            config = Configuration()
            config.browser_user_agent = get_user_agent()
            article_object = Article(article)
            article_object.download()

            if article_object:
                article_object.parse()
                if 'Death Notice:' in article_object.title:
                    continue
                title = article_object.title
                #date = article_object.publish_date
                text = article_object.text
                rv['article'] = title
                rv['pub_date'] = complement
                rv['nltk_score'] = get_nltk_score(text)
                rv['nltk_score_title'] = get_nltk_score(title)
                rv['source'] = 'Chicago Tribune'

            write_csv_pro(
                articles,
                'chicago_tribune_' + re.sub("/", "_", complement) + '.csv')

예제 #2

파일 보기

파일: scraper.py 프로젝트: ratulesrar3/cs122-project-group

def get_articles_pro(complement):
    '''
    Given a string (complement) of the form 2011/01/01,
    get articles for a given day from ProPublica
    Inputs:
            a string called complement containing the date
            for a given day
            propublica tag_type = 'div'
            propublica class_type = 'excerpt-thumb'
    Returns:
            Dictionary with articles for that day
            Writes csv files with nltk scores
    '''
    propublica = 'https://www.propublica.org/archive/'
    archive_url = propublica + complement + '/'
    articles = {}
    pm = urllib3.PoolManager()
    html = pm.urlopen(url=archive_url, method="GET").data
    soup = bs4.BeautifulSoup(html, 'lxml')
    tag_list = soup.find_all('div', class_='excerpt-thumb')

    if tag_list:
        for index, tag in enumerate(tag_list):
            rv = {}
            articles[index] = rv
            article = tag.a['href']
            print(article)
            config = Configuration()
            config.browser_user_agent = get_user_agent()
            article_object = Article(article)
            article_object.download()
            if article_object:
                article_object.parse()
                title = article_object.title
                #date = article_object.publish_date
                text = article_object.text
                rv['article'] = title
                rv['pub_date'] = complement
                rv['nltk_score'] = get_nltk_score(text)
                rv['nltk_score_title'] = get_nltk_score(title)
                rv['source'] = 'ProPublica'

        write_csv_pro(articles,
                      'propublica_' + re.sub("/", "_", complement) + '.csv')

    return articles

예제 #3

파일 보기

파일: scraper.py 프로젝트: ratulesrar3/cs122-project-group

def get_info(dictionary):
    '''
    Get information for all the articles
    for the selected sections in La Jornada
    Inputs:
            Dictionary with selected sections
            as keys and list of urls representing
            articles in every section
    Returns:
            A dictionary with nltk scores for title
            and text for every article in every section
    '''
    rv = {}
    count = 0
    for key, item in dictionary.items():
        for i in item:
            config = Configuration()
            config.browser_user_agent = get_user_agent()
            article = Article(i, language='es')
            article.download()
            if article.is_downloaded == True:
                irv = {}
                rv[count] = irv
                article.parse()
                count = count + 1
                title = article.title
                tr_title = mtranslate.translate(title, "en", "auto")
                #print(title, key, count)
                date = article.publish_date.date()
                text = article.text
                tr_text = translate_article(text)
                #if key not in rv:
                irv['article'] = tr_title
                irv['pub_date'] = date
                irv['nltk_score'] = get_nltk_score(
                    tr_text)  #will be converted into sentiment score
                irv['source'] = 'Jornada'
                irv['nltk_score_title'] = get_nltk_score(tr_title)
                #rv[key].append((title, date, text))
    return rv