def parse_articles(self, notify):
        page_index = 0
        logging.info("Parsing Blog " + self.BLOG_NAME)
        while True:
            page_index += 1
            logging.info("Page " + str(page_index))
            try:
                try:
                    page = urllib2.urlopen(self.ARTICLES_URL + str(page_index), timeout=NewsParser.TIMEOUT).read()
                except urllib2.HTTPError, e:
                    if e.code == 404:
                        logging.info("Finished parsing blog")
                        return
                    else:
                        raise Exception(e)

                articles = self.parse_initial_articles(page)
                for article_index in range(len(articles)):
                    article_url = articles[article_index]["url"]
                    if page_index == 1 and article_index == 0:
                        if self.newest_article_url == article_url:
                            logging.info("Local check: " + article_url + " already exists")
                            logging.info("Finished parsing blog")
                            return
                        else:
                            self.newest_article_url = article_url

                    payload = {"url": article_url}
                    article_exists_response = self.send_post(payload, "/article_exists")
                    if article_exists_response == "No":
                        logging.info(article_url + " does not exist yet")
                        new_article = self.parse_article_from_article_url(articles[article_index])
                        payload = {"url": new_article["url"],
                                   "date": new_article["date"],
                                   "heading": new_article["heading"],
                                   "content": new_article["content"],
                                   "image": new_article["image"],
                                   "publisher": self.BLOG_NAME}
                        self.send_post(payload, "/add_article")
                        if is_production() and notify:
                            notify_users_about_article(payload)
                            write_news(self.BLOG_NAME + ": " + new_article["heading"] + "\n")
                    elif article_exists_response == "Yes":
                        logging.info(article_url + " already exists")
                        logging.info("Finished parsing blog")
                        return
                    else:
                        logging.info("/article_exists sent unexpected answer: " + article_exists_response)
                        return

            except Exception, e:
                text = "Error while parsing news for " + self.BLOG_NAME + " on page " + str(page_index) + ": "
                text += traceback.format_exc()
                logging.info(text)
                send_to_slack(text)
                return
Beispiel #2
0
def download():
    """Downloads an artifact by it's content hash."""
    # Allow users with access to the build to download the file. Falls back
    # to API keys with access to the build. Prefer user first for speed.
    try:
        build = auth.can_user_access_build('build_id')
    except HTTPException:
        logging.debug('User access to artifact failed. Trying API key.')
        _, build = auth.can_api_key_access_build('build_id')

    sha1sum = request.args.get('sha1sum', type=str)
    if not sha1sum:
        logging.debug('Artifact sha1sum=%r not supplied', sha1sum)
        abort(404)

    artifact = models.Artifact.query.get(sha1sum)
    if not artifact:
        logging.debug('Artifact sha1sum=%r does not exist', sha1sum)
        abort(404)

    build_id = request.args.get('build_id', type=int)
    if not build_id:
        logging.debug('build_id missing for artifact sha1sum=%r', sha1sum)
        abort(404)

    is_owned = artifact.owners.filter_by(id=build_id).first()
    if not is_owned:
        logging.debug('build_id=%r not owner of artifact sha1sum=%r',
                      build_id, sha1sum)
        abort(403)

    # Make sure there are no Set-Cookie headers on the response so this
    # request is cachable by all HTTP frontends.
    @utils.after_this_request
    def no_session(response):
        if 'Set-Cookie' in response.headers:
            del response.headers['Set-Cookie']

    if not utils.is_production():
        # Insert a sleep to emulate how the page loading looks in production.
        time.sleep(1.5)

    if request.if_none_match and request.if_none_match.contains(sha1sum):
        response = flask.Response(status=304)
        return response

    return _get_artifact_response(artifact)
Beispiel #3
0
# HTTP -> HTTPS redirect
# https://stackoverflow.com/questions/32237379/python-flask-redirect-to-https-from-http/32238093
if os.getenv('REDIRECT_HTTP_TO_HTTPS'):

    @app.before_request
    def before_request_https():
        if request.url.startswith('http://'):
            url = request.url.replace('http://', 'https://', 1)
            # We use a 302 in case we need to revert the redirect.
            return redirect(url, code=302)


# Unique random key for sessions.
# For settings with multiple workers, an environment variable is required, otherwise cookies will be constantly removed and re-set by different workers.
if utils.is_production():
    if not os.getenv('SECRET_KEY'):
        raise RuntimeError(
            'The SECRET KEY must be provided for non-dev environments.')

    app.config['SECRET_KEY'] = os.getenv('SECRET_KEY')

else:
    app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', uuid.uuid4().hex)

if utils.is_heroku():
    app.config.update(
        SESSION_COOKIE_SECURE=True,
        SESSION_COOKIE_HTTPONLY=True,
        SESSION_COOKIE_SAMESITE='Lax',
    )
Beispiel #4
0
 def wrapped(*args, **kwargs):
     g.build = can_user_access_build(param_name)
     if not utils.is_production():
         # Insert a sleep to emulate page loading in production.
         time.sleep(0.5)
     return f(*args, **kwargs)
    def parse_articles(self, notify):
        page_index = 0
        logging.info("Parsing Blog " + self.BLOG_NAME)
        while True:
            page_index += 1
            logging.info("Page " + str(page_index))
            try:
                try:
                    page = urllib2.urlopen(self.ARTICLES_URL + str(page_index), timeout=NewsParser.TIMEOUT).read()
                except urllib2.HTTPError, e:
                    if e.code == 404:
                        logging.info("Finished parsing blog")
                        return
                    else:
                        raise Exception(e)

                tree = etree.HTML(page)
                articles_container = tree.xpath(RodingParser.ARTICLES_CONTAINER_XPATH)[0]
                articles = []
                for article in articles_container.getchildren():
                    if "post" in article.attrib["class"]:
                        articles.append(article)

                if len(articles) == 0:
                    return

                for article_index, article in enumerate(articles):
                    post_id = article.attrib['id']
                    texts = list(article.itertext())

                    heading = texts[1]
                    locale.setlocale(locale.LC_ALL, 'de_DE.utf8')
                    date = texts[3]
                    date = ' '.join(date.split(' ')[1:3])
                    date = datetime.strptime(date.encode('utf-8'), "%d.%B %Y")
                    date = str(time.mktime(date.timetuple()))
                    url = article.find('h2').find('a').attrib['href']
                    content = self.get_content_from_url(url, post_id)
                    image = ''
                    for elem in list(article.getiterator()):
                        if elem.tag == 'img':
                            image = elem.attrib["src"]
                            break

                    if self.newest_article_url == url:
                        logging.info("Local check: " + heading + " already exists")
                        logging.info("Finished parsing blog")
                        return
                    else:
                        self.newest_article_url = url

                    payload = {"url": url}
                    article_exists_response = self.send_post(payload, "/article_exists")
                    if article_exists_response == "No":
                        logging.info(heading + " does not exist yet")
                        payload = {"url": url,
                                   "date": date,
                                   "heading": heading,
                                   "content": content,
                                   "image": image,
                                   "publisher": self.BLOG_NAME}
                        self.send_post(payload, "/add_article")
                        if is_production() and notify:
                            notify_users_about_article(payload)
                            write_news(self.BLOG_NAME + ": " + heading + "\n")
                    elif article_exists_response == "Yes":
                        logging.info(url + " already exists")
                        logging.info("Finished parsing blog")
                        return
                    else:
                        logging.info("/article_exists sent unexpected answer: " + article_exists_response)
                        return

            except Exception, e:
                text = "Error while parsing news for " + self.BLOG_NAME + " on page " + str(page_index) + ": "
                text += traceback.format_exc()
                logging.info(text)
                send_to_slack(text)
                return
    def parse_articles(self, notify):
        page_index = -5
        logging.info("Parsing Blog " + self.BLOG_NAME)
        while True:
            page_index += 5
            logging.info("Page " + str(page_index))
            try:
                try:
                    page = urllib2.urlopen(self.ARTICLES_URL + str(page_index), timeout=NewsParser.TIMEOUT).read()
                except urllib2.HTTPError, e:
                    if e.code == 404:
                        logging.info("Finished parsing blog")
                        return
                    else:
                        raise Exception(e)

                tree = etree.HTML(page)
                articles_container = tree.xpath(MutterstadtParser.ARTICLES_CONTAINER_XPATH)[0]
                articles = []
                for article in articles_container:
                    if "class" in articles_container.keys() and "items-row cols-1 row-" in article.attrib["class"]:
                        articles.append(article)

                if len(articles) == 0:
                    return

                for article_index in range(len(articles)):
                    article = articles[article_index]
                    texts = list(article.itertext())

                    heading = texts[2]
                    content = texts[11:]
                    content = [text_part.strip() for text_part in content]
                    content = [text_part for text_part in content if text_part != '']
                    content = '\n'.join(content)
                    content = re.sub('(E-Mail\n\| Zugriffe: \d+\n)', '', content)
                    loc = locale.setlocale(locale.LC_ALL, 'de_DE.utf8')
                    date = ' '.join(texts[3].split(' ')[2:5])
                    date = datetime.strptime(date.encode('utf-8'), "%d. %B %Y")
                    date = str(time.mktime(date.timetuple()))
                    url = self.ARTICLES_URL + str(0) + "&heading=" + md5.new(heading.encode("utf-8")).hexdigest() + date
                    image = ''
                    for elem in list(article.getiterator()):
                        if elem.tag == 'img':
                            image = self.BLOG_BASE_URL + elem.attrib["src"]
                            break

                    if self.newest_article_url == url:
                        logging.info("Local check: " + heading + " already exists")
                        logging.info("Finished parsing blog")
                        return
                    else:
                        self.newest_article_url = url

                    payload = {"url": url}
                    article_exists_response = self.send_post(payload, "/article_exists")
                    if article_exists_response == "No":
                        logging.info(heading + " does not exist yet")
                        payload = {"url": url,
                                   "date": date,
                                   "heading": heading,
                                   "content": content,
                                   "image": image,
                                   "publisher": self.BLOG_NAME}
                        self.send_post(payload, "/add_article")
                        if is_production() and notify:
                            notify_users_about_article(payload)
                            write_news(self.BLOG_NAME + ": " + heading + "\n")
                    elif article_exists_response == "Yes":
                        logging.info(url + " already exists")
                        logging.info("Finished parsing blog")
                        return
                    else:
                        logging.info("/article_exists sent unexpected answer: " + article_exists_response)
                        return

            except Exception, e:
                text = "Error while parsing news for " + self.BLOG_NAME + " on page " + str(page_index) + ": "
                text += traceback.format_exc()
                logging.info(text)
                send_to_slack(text)
                return
                              leage_relay="2b")

    blog_parsers_instances = [
        BVDGParser(),
        SpeyerParser(),
        SchwedtParser(),
        MutterstadtParser(),
        RodingParser()
    ]

    try:
        for parser in [BuliParser1A, BuliParser1B, BuliParser2A, BuliParser2B]:
            parser.update_buli(args.notify)

        for blog_parser_instance in blog_parsers_instances:
            blog_parser_instance.parse_articles(args.notify)

        update_readme(blog_parsers_instances)

        if is_production():
            commit_changes()

        if os.path.isfile(NEWS_FILE):
            os.remove(NEWS_FILE)

    except:
        text = "An error occured:" + traceback.format_exc()
        print text
        if is_production():
            send_to_slack(text)
Beispiel #8
0
PAGSEGURO_ERRO_LOG  = LOCAL('logs') + '/pagseguro_erro.log' # arquivo para salvar os erros de validacao de retorno com o pagseguro(opcional
'''

FIXTURE_DIRS = (
    LOCAL('fixtures'),
)

MEDIA_ROOT = LOCAL('media') 

# MEDIA_URL = 'http://localhost:8000/drderma/media/'

STATIC_ROOT = LOCAL('static_root')

# SITE_URL = 'http://%s' % DEV_IP

if is_production():
    SITE_URL = 'http://example.drderma.bertoche.com.br'
else:
    SITE_URL = 'http://127.0.0.1'

MEDIA_URL = '%s/media/' % SITE_URL

#MEDIA_URL = '/media/'

STATIC_URL = '%s/static/' % SITE_URL
STATIC_URL = '/static/'
# STATIC_URL = 'http://localhost:8000/drderma/static/'

STATICFILES_DIRS = (
    LOCAL('static'),
)