Python Article.authors Examples

Programming Language: Python

Namespace/Package Name: newspaper

Class/Type: Article

Method/Function: authors

Examples at hotexamples.com: 5

Python Article.authors - 5 examples found. These are the top rated real world Python examples of newspaper.Article.authors extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Article(30)

nlp(30)

set_html(30)

parse(30)

download(30)

build(20)

html(15)

text(11)

download_state(9)

fetch_images(6)

is_valid_url(6)

publish_date(5)

authors(5)

is_downloaded(5)

title(4)

top_image(3)

article_html(3)

keywords(3)

set_text(2)

images(2)

has_top_image(2)

is_valid_body(2)

summary(1)

summarylen(1)

split(1)

set_top_img_no_check(1)

tag(1)

set_title(1)

tags(1)

textlen(1)

set_meta_data(1)

lower(1)

set_keywords(1)

save(1)

prepareSentenceHighlights(1)

nlpEntropy(1)

meta_data(1)

append(1)

is_video(1)

is_parsed(1)

is_media_news(1)

has_video(1)

get_is_news(1)

format_top_node(1)

category_urls(1)

articles(1)

url(1)

Example #1

Show file

    def scrapeArticle(self, url):
        article = Article(url)
        article.download()
        article.parse()

        if not article.authors == []:
            article.authors = article.authors[0]
        else:
            article.authors = ""

        return {
            "title": article.title[:500],
            "content": article.text,
            "author": article.authors
        }

Example #2

Show file

def test_convert_to_dict_most_fields_works():
    faker = Faker()

    source = Article(url=faker.url())

    source.authors = [faker.name(), faker.name()]
    source.top_image = faker.image_url()
    source.article_html = faker.text()
    source.images = [faker.image_url(), faker.image_url()]
    source.meta_data = [faker.city(), faker.state(), faker.country()]

    result = extractor.to_dict(source, "article_html", "authors", "images",
                               "keywords", "meta_data", "source_url",
                               "summary", "top_image", "url", "tags",
                               "meta_favicon")

    assert result
    assert len(result) == 7

    assert "article_html" in result
    assert "authors" in result
    assert "images" in result
    assert "keywords" not in result
    assert "meta_data" in result
    assert "source_url" in result
    assert "summary" not in result
    assert "top_image" in result
    assert "url" in result
    assert "tags" not in result
    assert "meta_favicon" not in result

Example #3

Show file

File: scrape_queue_worker.py Project: roguh/librarian

def process_html(url, html):
    log(f'Processing {url}')
    article = Article(url, KEYWORD_COUNT=25)
    article.download(input_html=html)
    article.parse()
    article.authors = '; '.join(article.authors)
    log(f'Parsed {len(article.text)} bytes of natural text')
    article.nlp()
    keywords = copy.deepcopy(article.keywords)
    article.keywords = ', '.join(keywords)
    return article, keywords

Example #4

Show file

    def get(self, request):
        url = request.GET.get('url')
        article = Article(url)
        article.download()
        article.parse()

        if not article.authors == []:
            article.authors = article.authors[0]
        else:
            article.authors = ""

        data = {
            "url": url,
            "title": article.title,
            "content": article.text,
            "author": article.authors,
            "date": article.publish_date
        }

        results = ScrapedArticleSerializer(data, many=False).data
        return Response(results)

Example #5

Show file

def parse_article(url):
  '''
  Responsible for parsing a single article.
  '''
  article = Article(url)

  print("Download data of URL: {}".format(url))

  article.download()

  # Fallback, otherwise the program would exit on the first invalid URL
  try:
    article.parse()
  except newspaper.article.ArticleException:
    print("Oops! The URL '{}' seems inaccessible!".format(url))

    article.authors = ['<UNK>']
    article.text = '<UNK>'

    return article

  return article