Example #1
0
def _fetch_article(news_site_id, url, link):
    logger.info(f'Starting to fetch article at {link}')
    article = None
    try:
        article = news.ArticlePage(news_site_id, _build_link(url, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error while fetching article', exc_info=False)
    if article and not article.body:
        logger.warning('Invalid article. There is no body in it')
        return None
    return article
def _fetch_article(new_site_uid, host, link):
    logging.info('Start fetching article at {}'.format(link))
    article = None
    try:
        article = news.ArticlePage(new_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logging.warning('Error while feching the article', exc_info=False)
    if article and not article.body:
        logging.warning('Invalid article. There is not body.')
        return None
    return article
def _fetch_article(new_site_uid, host, link):
    logger.info('iniciando busqueda del articulo'+ link)
    article = None
    try:
        article = news.ArticlePage(new_site_uid,_build_link(host,link))
    except (HTTPError,MaxRetryError) as e:
        logger.warning('Error encontrando el error',exc_info=False)
    if article and not article.body:
        logger.warning('articulo invalido no tiene cuerpo')
        return None
    return article
Example #4
0
def _fetch_article(news_site_uid, host, link):
    logger.info("Start fetching article at {}".format(link))
    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTOError, MaxRetryError):
        logger.warning("Error while fetching the article", exc_info=False)

    if article and not article.body:
        logger.warning("Invalid article. There is no body")
        return None
    return article
Example #5
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f'Start fetching article at {link}')

    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #6
0
def _fetch_article(news_site, host, link):
    article = None
    try:
        url = build_link(host, link)
        article = news.ArticlePage(news_site, url)
    except (HTTPError, MaxRetryError):
        print('Error while fetching article')

    if article and not article.body:
        print('Invalid Article. There is no body')
        return None

    return article
Example #7
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))

    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError, requests.ConnectionError) as e:
        logger.warning('error while fechting the article!', exc_info=False)

        if article and not article.body:
            logger.warning('invalid article. There is no body')
            return None
    return article
Example #8
0
def _fetch_article(news_site_uid, host, link):
    # logger.info(f'Start fetching article at {link}')
    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        # logger.warning('Error while fetching the article', exc_info=False)
        pass
    if article and not article.body:
        # logger.warning('Invalid article. There is no body')
        return None
    
    return article
Example #9
0
def _fetch_article(news_sites_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))
    article = None
    try:
        article = news.ArticlePage(news_sites_uid, link)
    except (HTTPError, MaxRetryError) as e:
        logger.error('The article coudn\'t be fetched')

    if article and not article.body:
        logger.warning('There isn\'t a body in this page.   ')
        return None

    return article
Example #10
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))
    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError, NewConnectionError, ConnectionError,
            gaierror, ContentDecodingError, SSLError, TooManyRedirects) as e:
        logger.warning('Error while fechting the article', exc_info=False)
    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #11
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at %s' % link)
    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError):
        # MaxRetryError: eliminar la posibilidad de que se vaya al infinito
        # trantando de acceder a la url
        logger.warning('Error while fetching the article', exc_info=False)
    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None
    return article
Example #12
0
def _fetch_article(news_site_uid, host, link):
	logger.info('start fetching article at {}'.format(link))

	article = None
	try:
		article = news.ArticlePage(news_sites_uid, _build_link(host, link))
	except (HTTPError, MaxRetryError) as e:
	    logger.warning('Error while fetching the article', exc_info = false)

	if article and not article.body:
		logger.warning('Articulo Invalido')
		return None

	return article
Example #13
0
def _fetch_article(news_site_uid, host, link):
    logger.info('esta buscando el archivo en {}'.format(link))
    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError) as e:
        logger.warning('Error mientras se buscaba el articulo', exc_info=False)

    if article and not article.body:
        logger.warning('articulo invalido, no hay cuerpo en el articulo')
        return None

    return article
Example #14
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))
    article = None
    try:
        article_page = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error while fetching the article', exc_info=False)

    if article_page and not article_page.content or not article_page.title or not article_page.date:
        logger.warning(
            'Invalid article. There is no valid article (no content, no title)'
        )
        return None
    return article_page
Example #15
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f'Start fetching article at {link}')

    article = None
    try: # Aquí comienza la sección de estrategia de programación defensiva
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error while fetching the article', exc_info=False)

    if article and not article.body:
        logger.warning('Invalid article. There is no body.')
        return None

    return article
Example #16
0
def _fetch_article(news_site_id, host, link):
    logger.info(f"Start fetching article at {link=}")

    article = None
    try:
        article = news.ArticlePage(news_site_id, _build_link(host, link))
    except (HTTPError, MaxRetryError, ConnectionError) as e:
        logger.warning("Error while fetching the article", exc_info=False)

    if article and not article.body:
        logger.warning("Invalid article. There is no body")
        return None

    return article
Example #17
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f'Obteniendo articulos de: {link}')

    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warn('Error mientras se obtenia el articulo', exc_info=False)

    if article and not article.body:
        logger.warning('Articulo invalido. No hay cuerpo')
        return None
    return article
Example #18
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))
    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('ERROR fetching article: {}'.format(e), exc_info=False)
    except Exception as e:
        logger.error('ERROR fetching article: {}'.format(e), exc_info=False)

    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #19
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f'Start fetching article at {link}')

    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError, DecodeError, ContentDecodingError,
            TimeoutError, NewConnectionError, ConnectionError) as e:
        logger.warning('Error while fechting the article', exc_info=False)

    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #20
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))

    article = None
    try:
        #create an article page object with the link built with the host link and the link from the homepage object
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error while fetching the article', exc_info=False)

    if article and not article.body:
        logger.warning('Invalid article, there is no body')
        return None

    return article
Example #21
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f'Iniciando extracion de articulo en:  {link}')

    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error al extraer el articulo', exc_info=False)

    if article and not article.body:
        logger.warning('Articulo Invalido. No hay un body')
        return None

    return article
Example #22
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start Fetching artitle at {}'.format(link))
    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error while fetching article', exc_info=False) #No mostramos log para no ensuciar consola

    
    print("\n\n\n\n Article, ", article)

    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #23
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f'Fetching article {link}...')
    article = None
    try:
        article = news.ArticlePage(news_site_uid, link)
    except (HTTPError, MaxRetryError) as e:
        logger.warning(f'Error while fetching article {link}', exc_info=False)
    except:
        logger.warning(f'Unknown Error while fetching article {link}',
                       exc_info=True)

    if article and not article.body:
        logger.warn('Invalid article. No body present')
        return None

    return article
Example #24
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Srart fetching article at {}'.format(link))

    article = None

    try:
        formated_link = _build_link(host, link)
        article = news.ArticlePage(news_site_uid, formated_link)
        article.url = formated_link
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error while fetching the articke', exc_info=False)

    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #25
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at %s' % link)

    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except Exception as e:
        logger.warning('Error while fetching the article', exc_info=False)
        print('Exception: ', e)
        return None

    if article and not article.body:
        logger.warning('Invalid article. There is no body.')
        return None

    return article
Example #26
0
def _fetch_article(news_site_uid, host, link):
    logger.info(f"Start fetching article at {link}")
    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning(f'Error while fetching the article: {e}',
                       exc_info=False)

    if article and not article.body:
        logger.warning(Fore.RED + 'Invalid article. There is no body')
        #Colorama
        print(Style.RESET_ALL)
        return None

    return article
Example #27
0
async def _fetch_article(news_site_uid, link, session):
    article = None
    error = 0
    logger.info('Start fetching article at {}'.format(link))
    try:
        article = news.ArticlePage(news_site_uid, link)
        await article.visit(session)

        if article and not article.body_csv:
            logger.warning('Invalid article. There is no body')
            error = 'Invalid article. There is no body'
            article = None

    except Exception as e:
        logger.error('ERROR fetching article: {}'.format(e), exc_info=False)
        error = 'ERROR fetching article {}: {}'.format(link, e)

    return (error, article, news_site_uid)
Example #28
0
def _fetch_article(news_site_uid, host, link):
    logger.info('Start fetching article at {}'.format(link))

    article = None
    try:
        #Envia cada articulo a la clase ArticlePage
        article = news.ArticlePage(news_site_uid, _build_link(
            host, link))  #revisa que los vinculos esten bien construidos
    except (HTTPError, MaxRetryError) as e:
        #si ocurre un error, invalida el articulo
        logger.warning('Error while fechting the article', exc_info=False)

    #si el articulo no tiene cuerpo, queda invalidado
    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    return article
Example #29
0
def _fetch_article(news_site_uid, host_url, link):
    logger.info("Start fetching article at {}".format(link))

    articles = None
    print(_build_link(host_url, link))
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host_url, link))
    except (HTTPError, MaxRetryError) as e:
        logger.warning('Error fetching the article', exc_info=False)

    if article and not article.body:
        logger.warning('Invalid article. There is no body')
        return None

    #print(article.title)
    #print('\n')
    #print(article.body)
    return article
Example #30
0
def _fetch_article(news_site_uid, link):
    logger.info("Start fetching article at {}".format(link))

    article = None

    try:
        article = news.ArticlePage(news_site_uid, _build_link(link))
    # except (HTTPError, MaxRetryError) as e:
    except:
        # HTTPErrorr --> cuando no se ha encontrado la página
        # MaxRetryError --> estoy eliminadno la posibildad de que se vaya al infinito tratando de seguir la URL
        logger.warning("Error while fetching the article", exc_info=False)
        # exc_info=False --> para que no me muestre el error

    if article and not article.body and not article.title:
        logger.warning("Invalid article. There is no body")
        return None

    return article