def parse_articles(self, notify): page_index = 0 logging.info("Parsing Blog " + self.BLOG_NAME) while True: page_index += 1 logging.info("Page " + str(page_index)) try: try: page = urllib2.urlopen(self.ARTICLES_URL + str(page_index), timeout=NewsParser.TIMEOUT).read() except urllib2.HTTPError, e: if e.code == 404: logging.info("Finished parsing blog") return else: raise Exception(e) articles = self.parse_initial_articles(page) for article_index in range(len(articles)): article_url = articles[article_index]["url"] if page_index == 1 and article_index == 0: if self.newest_article_url == article_url: logging.info("Local check: " + article_url + " already exists") logging.info("Finished parsing blog") return else: self.newest_article_url = article_url payload = {"url": article_url} article_exists_response = self.send_post(payload, "/article_exists") if article_exists_response == "No": logging.info(article_url + " does not exist yet") new_article = self.parse_article_from_article_url(articles[article_index]) payload = {"url": new_article["url"], "date": new_article["date"], "heading": new_article["heading"], "content": new_article["content"], "image": new_article["image"], "publisher": self.BLOG_NAME} self.send_post(payload, "/add_article") if is_production() and notify: notify_users_about_article(payload) write_news(self.BLOG_NAME + ": " + new_article["heading"] + "\n") elif article_exists_response == "Yes": logging.info(article_url + " already exists") logging.info("Finished parsing blog") return else: logging.info("/article_exists sent unexpected answer: " + article_exists_response) return except Exception, e: text = "Error while parsing news for " + self.BLOG_NAME + " on page " + str(page_index) + ": " text += traceback.format_exc() logging.info(text) send_to_slack(text) return
def download(): """Downloads an artifact by it's content hash.""" # Allow users with access to the build to download the file. Falls back # to API keys with access to the build. Prefer user first for speed. try: build = auth.can_user_access_build('build_id') except HTTPException: logging.debug('User access to artifact failed. Trying API key.') _, build = auth.can_api_key_access_build('build_id') sha1sum = request.args.get('sha1sum', type=str) if not sha1sum: logging.debug('Artifact sha1sum=%r not supplied', sha1sum) abort(404) artifact = models.Artifact.query.get(sha1sum) if not artifact: logging.debug('Artifact sha1sum=%r does not exist', sha1sum) abort(404) build_id = request.args.get('build_id', type=int) if not build_id: logging.debug('build_id missing for artifact sha1sum=%r', sha1sum) abort(404) is_owned = artifact.owners.filter_by(id=build_id).first() if not is_owned: logging.debug('build_id=%r not owner of artifact sha1sum=%r', build_id, sha1sum) abort(403) # Make sure there are no Set-Cookie headers on the response so this # request is cachable by all HTTP frontends. @utils.after_this_request def no_session(response): if 'Set-Cookie' in response.headers: del response.headers['Set-Cookie'] if not utils.is_production(): # Insert a sleep to emulate how the page loading looks in production. time.sleep(1.5) if request.if_none_match and request.if_none_match.contains(sha1sum): response = flask.Response(status=304) return response return _get_artifact_response(artifact)
# HTTP -> HTTPS redirect # https://stackoverflow.com/questions/32237379/python-flask-redirect-to-https-from-http/32238093 if os.getenv('REDIRECT_HTTP_TO_HTTPS'): @app.before_request def before_request_https(): if request.url.startswith('http://'): url = request.url.replace('http://', 'https://', 1) # We use a 302 in case we need to revert the redirect. return redirect(url, code=302) # Unique random key for sessions. # For settings with multiple workers, an environment variable is required, otherwise cookies will be constantly removed and re-set by different workers. if utils.is_production(): if not os.getenv('SECRET_KEY'): raise RuntimeError( 'The SECRET KEY must be provided for non-dev environments.') app.config['SECRET_KEY'] = os.getenv('SECRET_KEY') else: app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', uuid.uuid4().hex) if utils.is_heroku(): app.config.update( SESSION_COOKIE_SECURE=True, SESSION_COOKIE_HTTPONLY=True, SESSION_COOKIE_SAMESITE='Lax', )
def wrapped(*args, **kwargs): g.build = can_user_access_build(param_name) if not utils.is_production(): # Insert a sleep to emulate page loading in production. time.sleep(0.5) return f(*args, **kwargs)
def parse_articles(self, notify): page_index = 0 logging.info("Parsing Blog " + self.BLOG_NAME) while True: page_index += 1 logging.info("Page " + str(page_index)) try: try: page = urllib2.urlopen(self.ARTICLES_URL + str(page_index), timeout=NewsParser.TIMEOUT).read() except urllib2.HTTPError, e: if e.code == 404: logging.info("Finished parsing blog") return else: raise Exception(e) tree = etree.HTML(page) articles_container = tree.xpath(RodingParser.ARTICLES_CONTAINER_XPATH)[0] articles = [] for article in articles_container.getchildren(): if "post" in article.attrib["class"]: articles.append(article) if len(articles) == 0: return for article_index, article in enumerate(articles): post_id = article.attrib['id'] texts = list(article.itertext()) heading = texts[1] locale.setlocale(locale.LC_ALL, 'de_DE.utf8') date = texts[3] date = ' '.join(date.split(' ')[1:3]) date = datetime.strptime(date.encode('utf-8'), "%d.%B %Y") date = str(time.mktime(date.timetuple())) url = article.find('h2').find('a').attrib['href'] content = self.get_content_from_url(url, post_id) image = '' for elem in list(article.getiterator()): if elem.tag == 'img': image = elem.attrib["src"] break if self.newest_article_url == url: logging.info("Local check: " + heading + " already exists") logging.info("Finished parsing blog") return else: self.newest_article_url = url payload = {"url": url} article_exists_response = self.send_post(payload, "/article_exists") if article_exists_response == "No": logging.info(heading + " does not exist yet") payload = {"url": url, "date": date, "heading": heading, "content": content, "image": image, "publisher": self.BLOG_NAME} self.send_post(payload, "/add_article") if is_production() and notify: notify_users_about_article(payload) write_news(self.BLOG_NAME + ": " + heading + "\n") elif article_exists_response == "Yes": logging.info(url + " already exists") logging.info("Finished parsing blog") return else: logging.info("/article_exists sent unexpected answer: " + article_exists_response) return except Exception, e: text = "Error while parsing news for " + self.BLOG_NAME + " on page " + str(page_index) + ": " text += traceback.format_exc() logging.info(text) send_to_slack(text) return
def parse_articles(self, notify): page_index = -5 logging.info("Parsing Blog " + self.BLOG_NAME) while True: page_index += 5 logging.info("Page " + str(page_index)) try: try: page = urllib2.urlopen(self.ARTICLES_URL + str(page_index), timeout=NewsParser.TIMEOUT).read() except urllib2.HTTPError, e: if e.code == 404: logging.info("Finished parsing blog") return else: raise Exception(e) tree = etree.HTML(page) articles_container = tree.xpath(MutterstadtParser.ARTICLES_CONTAINER_XPATH)[0] articles = [] for article in articles_container: if "class" in articles_container.keys() and "items-row cols-1 row-" in article.attrib["class"]: articles.append(article) if len(articles) == 0: return for article_index in range(len(articles)): article = articles[article_index] texts = list(article.itertext()) heading = texts[2] content = texts[11:] content = [text_part.strip() for text_part in content] content = [text_part for text_part in content if text_part != ''] content = '\n'.join(content) content = re.sub('(E-Mail\n\| Zugriffe: \d+\n)', '', content) loc = locale.setlocale(locale.LC_ALL, 'de_DE.utf8') date = ' '.join(texts[3].split(' ')[2:5]) date = datetime.strptime(date.encode('utf-8'), "%d. %B %Y") date = str(time.mktime(date.timetuple())) url = self.ARTICLES_URL + str(0) + "&heading=" + md5.new(heading.encode("utf-8")).hexdigest() + date image = '' for elem in list(article.getiterator()): if elem.tag == 'img': image = self.BLOG_BASE_URL + elem.attrib["src"] break if self.newest_article_url == url: logging.info("Local check: " + heading + " already exists") logging.info("Finished parsing blog") return else: self.newest_article_url = url payload = {"url": url} article_exists_response = self.send_post(payload, "/article_exists") if article_exists_response == "No": logging.info(heading + " does not exist yet") payload = {"url": url, "date": date, "heading": heading, "content": content, "image": image, "publisher": self.BLOG_NAME} self.send_post(payload, "/add_article") if is_production() and notify: notify_users_about_article(payload) write_news(self.BLOG_NAME + ": " + heading + "\n") elif article_exists_response == "Yes": logging.info(url + " already exists") logging.info("Finished parsing blog") return else: logging.info("/article_exists sent unexpected answer: " + article_exists_response) return except Exception, e: text = "Error while parsing news for " + self.BLOG_NAME + " on page " + str(page_index) + ": " text += traceback.format_exc() logging.info(text) send_to_slack(text) return
leage_relay="2b") blog_parsers_instances = [ BVDGParser(), SpeyerParser(), SchwedtParser(), MutterstadtParser(), RodingParser() ] try: for parser in [BuliParser1A, BuliParser1B, BuliParser2A, BuliParser2B]: parser.update_buli(args.notify) for blog_parser_instance in blog_parsers_instances: blog_parser_instance.parse_articles(args.notify) update_readme(blog_parsers_instances) if is_production(): commit_changes() if os.path.isfile(NEWS_FILE): os.remove(NEWS_FILE) except: text = "An error occured:" + traceback.format_exc() print text if is_production(): send_to_slack(text)
PAGSEGURO_ERRO_LOG = LOCAL('logs') + '/pagseguro_erro.log' # arquivo para salvar os erros de validacao de retorno com o pagseguro(opcional ''' FIXTURE_DIRS = ( LOCAL('fixtures'), ) MEDIA_ROOT = LOCAL('media') # MEDIA_URL = 'http://localhost:8000/drderma/media/' STATIC_ROOT = LOCAL('static_root') # SITE_URL = 'http://%s' % DEV_IP if is_production(): SITE_URL = 'http://example.drderma.bertoche.com.br' else: SITE_URL = 'http://127.0.0.1' MEDIA_URL = '%s/media/' % SITE_URL #MEDIA_URL = '/media/' STATIC_URL = '%s/static/' % SITE_URL STATIC_URL = '/static/' # STATIC_URL = 'http://localhost:8000/drderma/static/' STATICFILES_DIRS = ( LOCAL('static'), )