Ejemplo n.º 1
0
def fetch_document(self, doc: Document, logger=None):
    """Copy the given document (proposal.models.Document) to a local
    directory.

    :returns: (new_or_updated, doc_id)
    :returns: if the document was downloaded successfully and is new or
    updated, returns the primary key. Otherwise, returns None.
    """
    logger = logger or get_logger(self)
    url = doc.url

    logger.info("Fetching Document #%i", doc.pk)
    dl, status, updated = doc_utils.save_from_url(doc, url, "download")
    if dl:
        if status == 304:
            logger.info("Document #%i is up to date", doc.pk)
            return (False, doc.pk)
        else:
            logger.info("Copied %s Document #%i -> %s",
                        "updated" if updated else "new",
                        doc.pk, doc.document.path)
            return (True, doc.pk)
    else:
        logger.warning(
            "Attempt to download document #%i (%s) failed with code %i",
            doc.pk, doc.url, status)
        if 400 <= status < 500:
            # Further attempts are not going to succeed, so delete the document
            logger.warning("Document #%i deleted.", doc.pk)
            doc.delete()
            raise DocumentDownloadFatalException()
        else:
            # This error could eventually go away, so make sure to retry later
            raise DocumentDownloadException()
Ejemplo n.º 2
0
def fetch_document(self, doc: Document, logger=None):
    """Copy the given document (proposal.models.Document) to a local
    directory.

    :returns: (new_or_updated, doc_id)
    :returns: if the document was downloaded successfully and is new or
    updated, returns the primary key. Otherwise, returns None.
    """
    logger = logger or get_logger(self)
    url = doc.url

    logger.info("Fetching Document #%i", doc.pk)
    dl, status, updated = doc_utils.save_from_url(doc, url, "download")
    if dl:
        if status == 304:
            logger.info("Document #%i is up to date", doc.pk)
            return (False, doc.pk)
        else:
            logger.info("Copied %s Document #%i -> %s",
                        "updated" if updated else "new",
                        doc.pk, doc.document.path)
            return (True, doc.pk)
    else:
        logger.warning(
            "Attempt to download document #%i (%s) failed with code %i",
            doc.pk, doc.url, status)
        if 400 <= status < 500:
            # Further attempts are not going to succeed, so delete the document
            logger.warning("Document #%i deleted.", doc.pk)
            doc.delete()
            raise DocumentDownloadFatalException()
        else:
            # This error could eventually go away, so make sure to retry later
            raise DocumentDownloadException()
Ejemplo n.º 3
0
def send_subscription_confirmation_email(self, sub: Subscription):
    """
    """
    logger = get_logger(self)
    user = sub.user

    if not user.is_active:
        mail.send_welcome_email(sub)
        return

    if sub.active:
        return

    existing = None
    if sub.site_name:
        config = site_config.by_hostname(sub.site_name)
        if not config.allow_multiple_subscriptions:
            try:
                existing = user.subscriptions.active()\
                                             .filter(site_name=sub.site_name)\
                                             .exclude(pk=sub.id)[0]
            except IndexError:
                pass

    # Send an email to confirm the subscription change
    if existing:
        mail.send_replace_subscription_email(sub, existing, logger)
    else:
        mail.send_confirm_subscription_email(sub, logger)
Ejemplo n.º 4
0
def send_user_updates(self, sub: Subscription, updates):
    """Sends an email to a user containing a summary of recent updates to a
    Subscription.

    """
    user = sub.user
    send_mail(user.email, "Cornerwise: New Updates", "updates",
              mail.updates_context(sub, updates),
              logger=get_logger(self))
Ejemplo n.º 5
0
    def __init__(self,
                 url: str,
                 browser: Union[Chrome, Firefox, Safari] = None,
                 wait: float = 30):
        self.url = url
        self.logger = logger.get_logger(f'{type(self).__name__}')
        self.browser = browser or get_chrome()
        self.browser.implicitly_wait(wait)

        if not browser:
            self.logger.info('Browser was created.')
Ejemplo n.º 6
0
def pull_updates(self, since: datetime=None, importers_filter={}):
    """Run importers that match the given filters, or all filters if no filters are
    specified.

    """
    if isinstance(importers_filter, str):
        importers_filter = {"region_name__icontains": importers_filter}
    elif importers_filter is None:
        importers_filter = {}

    return fetch_proposals(since,
                           importers=Importer.objects.filter(**importers_filter),
                           logger=get_logger(self))
Ejemplo n.º 7
0
def pull_updates(self, since: datetime=None, importers_filter={}):
    """Run importers that match the given filters, or all filters if no filters are
    specified.

    """
    if isinstance(importers_filter, str):
        importers_filter = {"region_name__icontains": importers_filter}
    elif importers_filter is None:
        importers_filter = {}

    return fetch_proposals(since,
                           importers=Importer.objects.filter(**importers_filter),
                           logger=get_logger(self))
Ejemplo n.º 8
0
def document_processing_failed(task, exc, task_id, args, kwargs, einfo, ):
    """Called when a document processing task fails more than max_retries.

    """
    logger = get_logger(task)
    doc = Document.objects.get(pk=args[0])
    if isinstance(exc, (DocumentDownloadException, DocumentDownloadFatalException)):
        logger.warning(
            "Processing for Document #%i (%s) failed repeatedly. Deleting.",
            doc.pk, doc.url)
        doc.delete()
    else:
        logger.warning(
            "Processing for Document #%i (%s) failed repeatedly",
            doc.pk, doc.url)
Ejemplo n.º 9
0
def process_document(self, doc: Document):
    logger = get_logger(self)
    updated, _ = fetch_document(doc, logger)

    extracted = (not updated and doc.fulltext) or \
        extract_text(doc, logger)

    if extracted:
        add_doc_attributes(doc, logger)

        image_ids = extract_images(doc, logger)
        post_process_images(doc, image_ids, logger)

    if updated or not doc.thumbnail:
        generate_doc_thumbnail(doc, logger)
Ejemplo n.º 10
0
def process_document(self, doc: Document):
    logger = get_logger(self)
    updated, _ = fetch_document(doc, logger)

    extracted = (not updated and doc.fulltext) or \
        extract_text(doc, logger)

    if extracted:
        add_doc_attributes(doc, logger)

        image_ids = extract_images(doc, logger)
        post_process_images(doc, image_ids, logger)

    if updated or not doc.thumbnail:
        generate_doc_thumbnail(doc, logger)
Ejemplo n.º 11
0
def extract_text(self, doc: Document, logger=None):
    """If a document is a PDF that has been copied to the filesystem, extract its
    text contents to a file and save the path of the text document.

    :param doc: proposal.models.Document object

    :returns: The same document

    """
    logger = logger or get_logger(self)
    if doc_utils.extract_text(doc):
        logger.info("Extracted text from Document #%i to %s.", doc.pk,
                    doc.fulltext)
        return True
    else:
        logger.error("Failed to extract text from %s", doc.local_path)
Ejemplo n.º 12
0
def extract_text(self, doc: Document, logger=None):
    """If a document is a PDF that has been copied to the filesystem, extract its
    text contents to a file and save the path of the text document.

    :param doc: proposal.models.Document object

    :returns: The same document

    """
    logger = logger or get_logger(self)
    if doc_utils.extract_text(doc):
        logger.info("Extracted text from Document #%i to %s.", doc.pk,
                    doc.fulltext)
        return True
    else:
        logger.error("Failed to extract text from %s", doc.local_path)
Ejemplo n.º 13
0
def document_processing_failed(task, exc, task_id, args, kwargs, einfo, ):
    """Called when a document processing task fails more than max_retries.

    """
    logger = get_logger(task)
    doc = Document.objects.get(pk=args[0])
    if isinstance(exc, (DocumentDownloadException, DocumentDownloadFatalException)):
        logger.warning(
            "Processing for Document #%i (%s) failed repeatedly. Deleting.",
            doc.pk, doc.url)
        doc.delete()
    else:
        logger.warning(
            "Processing for Document #%i (%s) failed repeatedly",
            doc.pk, doc.url)
        doc.processing_state = "failed"
        doc.save()
Ejemplo n.º 14
0
def create_appointment_intent(bot_id, locale_id):
    logger = get_logger(__name__)
    bot_language = os.environ.get("botLanguage")

    # create custom slot type (AppointmentType)
    slot_type_response = create_appointment_slot_type(bot_language, bot_id,
                                                      locale_id)
    logger.info(slot_type_response)
    # extract slot_type_id
    appointment_slot_type_id = slot_type_response["slotTypeId"]
    # create intent
    intent_response = create_intent(bot_id, locale_id)
    logger.info(intent_response)
    # extract intent_id
    intent_id = intent_response["intentId"]
    # create slots time, date and appointment type
    time_slot_repsonse = create_appointment_slot("Time", "AMAZON.Time", "time",
                                                 bot_language, bot_id,
                                                 locale_id, intent_id)
    date_slot_response = create_appointment_slot("Date", "AMAZON.Date", "date",
                                                 bot_language, bot_id,
                                                 locale_id, intent_id)
    appointment_slot_response = create_appointment_slot(
        "AppointmentType",
        appointment_slot_type_id,
        "appointmentType",
        bot_language,
        bot_id,
        locale_id,
        intent_id,
    )
    # extract slot id's
    time_slot_id = time_slot_repsonse["slotId"]
    date_slot_id = date_slot_response["slotId"]
    appointment_slot_id = appointment_slot_response["slotId"]
    # update the intent for prioritizing slots in the intent
    update_appointment_intent(
        bot_language,
        intent_id,
        appointment_slot_id,
        date_slot_id,
        time_slot_id,
        bot_id,
        locale_id,
    )
Ejemplo n.º 15
0
def send_notifications(self, subscription_ids=None, since=None):
    """Check the Subscriptions and find those that have new updates since the last
    update was run.

    """
    logger = get_logger(self)
    if subscription_ids:
        subscriptions = Subscription.objects.filter(pk__in=subscription_ids)
    else:
        subscriptions = Subscription.objects.due(since)

    sent = []
    for subscription in subscriptions:
        if send_subscription_updates(subscription, since):
            sent.append(subscription.pk)

    if sent:
        logger.info("Sent updates for %s subscription(s)", len(sent))
        Subscription.objects.filter(pk__in=sent).mark_sent()
    else:
        logger.info("No updates sent")
Ejemplo n.º 16
0
def collect_sendgrid_stats(self):
    if not (SG and SG.api_key):
        return

    logger = get_logger(self)

    since = red.get_key(f"cornerwise:collected_sg_stats")
    now = timezone.now()
    week = now - timedelta(days=7)
    since = max(since, week) if since else week

    date_str = since.strftime("%Y-%m-%d")
    resp = SG.client.stats.get(query_params={"start_date": date_str})

    if resp.status_code == 200:
        stats = json.loads(resp.body)
        red.set_many(
            (f"cornerwise:sg_daily:{s['date']}", s) for s in stats)
        red.set_key(f"cornerwise:collected_sg_stats", now)
    else:
        logger.warning("Failed to fetch SendGrid stats.\n"
                       f"Response code: {stats.status_code}\n"
                       f"Message: {stats}")
Ejemplo n.º 17
0
#  http://www.apache.org/licenses/LICENSE-2.0                                                                         #
#                                                                                                                     #
#  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES  #
#  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions     #
#  and limitations under the License.                                                                                 #
# #####################################################################################################################
import os
import json
import time
import botocore
import boto3
from shared.wrappers import code_pipeline_exception_handler
from shared.logger import get_logger
from shared.helper import get_client, get_built_in_model_monitor_container_uri

logger = get_logger(__name__)

sm_client = get_client("sagemaker")
cp_client = get_client("codepipeline")


@code_pipeline_exception_handler
def handler(event, context):
    # Extract the Job ID
    job_id = event["CodePipeline.job"]["id"]

    baseline_job_name = os.environ["BASELINE_JOB_NAME"]
    assets_bucket = os.environ["ASSETS_BUCKET"]
    training_data_location = os.environ["TRAINING_DATA_LOCATION"]
    baseline_job_output_location = os.environ["BASELINE_JOB_OUTPUT_LOCATION"]
    instance_type = os.environ["INSTANCE_TYPE"]
Ejemplo n.º 18
0
 def __init__(self, config: Scheduler, time_zone: str):
     self.headless = config.BROWSER_HEADLESS
     self.interval = config.INTERVAL
     self.tz = pytz.timezone(time_zone)
     self.logger = logger.get_logger('VisitScheduler')
Ejemplo n.º 19
0
 def __init__(self, username: str, password: str, headless: bool = False):
     self.headless = headless
     self.password = password
     self.username = username
     self.logger = logger.get_logger('Visitor')
Ejemplo n.º 20
0
def resend_user_key(self, user: User):
    mail.send_login_link(user, get_logger(self))
Ejemplo n.º 21
0
def test_get_level_locally():
    logging.getLogger().handlers = []
    logger = get_logger(__name__)
    assert logger.level == 0
Ejemplo n.º 22
0
def cleanup_subscriptions(self):
    logger = get_logger(self)
    deleted_count, counts = Subscription.objects.stale().delete()
    subs_count = counts["user.Subscription"]
    if subs_count:
        logger.info("Deleted %s unconfirmed subscriptions", subs_count)
Ejemplo n.º 23
0
def send_staff_notification(self, sub: Subscription, title, message):
    mail.send_staff_notification_email(sub,
                                       title or "New Message", message,
                                       get_logger(self))