def fetch_document(self, doc: Document, logger=None): """Copy the given document (proposal.models.Document) to a local directory. :returns: (new_or_updated, doc_id) :returns: if the document was downloaded successfully and is new or updated, returns the primary key. Otherwise, returns None. """ logger = logger or get_logger(self) url = doc.url logger.info("Fetching Document #%i", doc.pk) dl, status, updated = doc_utils.save_from_url(doc, url, "download") if dl: if status == 304: logger.info("Document #%i is up to date", doc.pk) return (False, doc.pk) else: logger.info("Copied %s Document #%i -> %s", "updated" if updated else "new", doc.pk, doc.document.path) return (True, doc.pk) else: logger.warning( "Attempt to download document #%i (%s) failed with code %i", doc.pk, doc.url, status) if 400 <= status < 500: # Further attempts are not going to succeed, so delete the document logger.warning("Document #%i deleted.", doc.pk) doc.delete() raise DocumentDownloadFatalException() else: # This error could eventually go away, so make sure to retry later raise DocumentDownloadException()
def send_subscription_confirmation_email(self, sub: Subscription): """ """ logger = get_logger(self) user = sub.user if not user.is_active: mail.send_welcome_email(sub) return if sub.active: return existing = None if sub.site_name: config = site_config.by_hostname(sub.site_name) if not config.allow_multiple_subscriptions: try: existing = user.subscriptions.active()\ .filter(site_name=sub.site_name)\ .exclude(pk=sub.id)[0] except IndexError: pass # Send an email to confirm the subscription change if existing: mail.send_replace_subscription_email(sub, existing, logger) else: mail.send_confirm_subscription_email(sub, logger)
def send_user_updates(self, sub: Subscription, updates): """Sends an email to a user containing a summary of recent updates to a Subscription. """ user = sub.user send_mail(user.email, "Cornerwise: New Updates", "updates", mail.updates_context(sub, updates), logger=get_logger(self))
def __init__(self, url: str, browser: Union[Chrome, Firefox, Safari] = None, wait: float = 30): self.url = url self.logger = logger.get_logger(f'{type(self).__name__}') self.browser = browser or get_chrome() self.browser.implicitly_wait(wait) if not browser: self.logger.info('Browser was created.')
def pull_updates(self, since: datetime=None, importers_filter={}): """Run importers that match the given filters, or all filters if no filters are specified. """ if isinstance(importers_filter, str): importers_filter = {"region_name__icontains": importers_filter} elif importers_filter is None: importers_filter = {} return fetch_proposals(since, importers=Importer.objects.filter(**importers_filter), logger=get_logger(self))
def document_processing_failed(task, exc, task_id, args, kwargs, einfo, ): """Called when a document processing task fails more than max_retries. """ logger = get_logger(task) doc = Document.objects.get(pk=args[0]) if isinstance(exc, (DocumentDownloadException, DocumentDownloadFatalException)): logger.warning( "Processing for Document #%i (%s) failed repeatedly. Deleting.", doc.pk, doc.url) doc.delete() else: logger.warning( "Processing for Document #%i (%s) failed repeatedly", doc.pk, doc.url)
def process_document(self, doc: Document): logger = get_logger(self) updated, _ = fetch_document(doc, logger) extracted = (not updated and doc.fulltext) or \ extract_text(doc, logger) if extracted: add_doc_attributes(doc, logger) image_ids = extract_images(doc, logger) post_process_images(doc, image_ids, logger) if updated or not doc.thumbnail: generate_doc_thumbnail(doc, logger)
def extract_text(self, doc: Document, logger=None): """If a document is a PDF that has been copied to the filesystem, extract its text contents to a file and save the path of the text document. :param doc: proposal.models.Document object :returns: The same document """ logger = logger or get_logger(self) if doc_utils.extract_text(doc): logger.info("Extracted text from Document #%i to %s.", doc.pk, doc.fulltext) return True else: logger.error("Failed to extract text from %s", doc.local_path)
def document_processing_failed(task, exc, task_id, args, kwargs, einfo, ): """Called when a document processing task fails more than max_retries. """ logger = get_logger(task) doc = Document.objects.get(pk=args[0]) if isinstance(exc, (DocumentDownloadException, DocumentDownloadFatalException)): logger.warning( "Processing for Document #%i (%s) failed repeatedly. Deleting.", doc.pk, doc.url) doc.delete() else: logger.warning( "Processing for Document #%i (%s) failed repeatedly", doc.pk, doc.url) doc.processing_state = "failed" doc.save()
def create_appointment_intent(bot_id, locale_id): logger = get_logger(__name__) bot_language = os.environ.get("botLanguage") # create custom slot type (AppointmentType) slot_type_response = create_appointment_slot_type(bot_language, bot_id, locale_id) logger.info(slot_type_response) # extract slot_type_id appointment_slot_type_id = slot_type_response["slotTypeId"] # create intent intent_response = create_intent(bot_id, locale_id) logger.info(intent_response) # extract intent_id intent_id = intent_response["intentId"] # create slots time, date and appointment type time_slot_repsonse = create_appointment_slot("Time", "AMAZON.Time", "time", bot_language, bot_id, locale_id, intent_id) date_slot_response = create_appointment_slot("Date", "AMAZON.Date", "date", bot_language, bot_id, locale_id, intent_id) appointment_slot_response = create_appointment_slot( "AppointmentType", appointment_slot_type_id, "appointmentType", bot_language, bot_id, locale_id, intent_id, ) # extract slot id's time_slot_id = time_slot_repsonse["slotId"] date_slot_id = date_slot_response["slotId"] appointment_slot_id = appointment_slot_response["slotId"] # update the intent for prioritizing slots in the intent update_appointment_intent( bot_language, intent_id, appointment_slot_id, date_slot_id, time_slot_id, bot_id, locale_id, )
def send_notifications(self, subscription_ids=None, since=None): """Check the Subscriptions and find those that have new updates since the last update was run. """ logger = get_logger(self) if subscription_ids: subscriptions = Subscription.objects.filter(pk__in=subscription_ids) else: subscriptions = Subscription.objects.due(since) sent = [] for subscription in subscriptions: if send_subscription_updates(subscription, since): sent.append(subscription.pk) if sent: logger.info("Sent updates for %s subscription(s)", len(sent)) Subscription.objects.filter(pk__in=sent).mark_sent() else: logger.info("No updates sent")
def collect_sendgrid_stats(self): if not (SG and SG.api_key): return logger = get_logger(self) since = red.get_key(f"cornerwise:collected_sg_stats") now = timezone.now() week = now - timedelta(days=7) since = max(since, week) if since else week date_str = since.strftime("%Y-%m-%d") resp = SG.client.stats.get(query_params={"start_date": date_str}) if resp.status_code == 200: stats = json.loads(resp.body) red.set_many( (f"cornerwise:sg_daily:{s['date']}", s) for s in stats) red.set_key(f"cornerwise:collected_sg_stats", now) else: logger.warning("Failed to fetch SendGrid stats.\n" f"Response code: {stats.status_code}\n" f"Message: {stats}")
# http://www.apache.org/licenses/LICENSE-2.0 # # # # or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # # and limitations under the License. # # ##################################################################################################################### import os import json import time import botocore import boto3 from shared.wrappers import code_pipeline_exception_handler from shared.logger import get_logger from shared.helper import get_client, get_built_in_model_monitor_container_uri logger = get_logger(__name__) sm_client = get_client("sagemaker") cp_client = get_client("codepipeline") @code_pipeline_exception_handler def handler(event, context): # Extract the Job ID job_id = event["CodePipeline.job"]["id"] baseline_job_name = os.environ["BASELINE_JOB_NAME"] assets_bucket = os.environ["ASSETS_BUCKET"] training_data_location = os.environ["TRAINING_DATA_LOCATION"] baseline_job_output_location = os.environ["BASELINE_JOB_OUTPUT_LOCATION"] instance_type = os.environ["INSTANCE_TYPE"]
def __init__(self, config: Scheduler, time_zone: str): self.headless = config.BROWSER_HEADLESS self.interval = config.INTERVAL self.tz = pytz.timezone(time_zone) self.logger = logger.get_logger('VisitScheduler')
def __init__(self, username: str, password: str, headless: bool = False): self.headless = headless self.password = password self.username = username self.logger = logger.get_logger('Visitor')
def resend_user_key(self, user: User): mail.send_login_link(user, get_logger(self))
def test_get_level_locally(): logging.getLogger().handlers = [] logger = get_logger(__name__) assert logger.level == 0
def cleanup_subscriptions(self): logger = get_logger(self) deleted_count, counts = Subscription.objects.stale().delete() subs_count = counts["user.Subscription"] if subs_count: logger.info("Deleted %s unconfirmed subscriptions", subs_count)
def send_staff_notification(self, sub: Subscription, title, message): mail.send_staff_notification_email(sub, title or "New Message", message, get_logger(self))