Ejemplo n.º 1
0
def delete_search(request, search_id):
    '''
    Will flag the search as deleted, which the deletion processor will pick up
    and remove from the DB and the storage location
    '''

    error_message = ""
    # Verify that the user is logged in
    if not request.session.get('userid', False):
        return redirect('/login')

    try:
        search_obj = searches.objects.get(search_id=search_id)
        logging.info(f"Marking search as deleted: {search_id}. {search_obj}")

        search_obj.deleted = True
        search_obj.save()

    except Exception as exp:  # pylint: disable=broad-except
        error = create_error_message(exp, os.path.basename(__file__))
        log_error(f"Error marking search as deleted:  {search_id}. {error}",
                  json.dumps(dict(request.POST)))

        if settings.DEBUG:
            error_message = error
        else:
            error_message = "An unexpected error has occurred."

    request.session["error_message"] = error_message
    return redirect(mysearches)
Ejemplo n.º 2
0
 def get_queue(self):
     '''
     Get the searches to be deleted
     Returns:
         queue (list): Items to be deleted
         cont (bool): If the loop should continue or not
     '''
     try:
         # delete searches completed/failed before this date
         delete_date = timezone.now() - datetime.timedelta(
             days=settings.NUM_MONTHS_KEEP_SEARCHES * 30)
         queue = self.searches.objects.filter(
             Q(date_completed_compression__lte=delete_date)
             | Q(failed_date__lte=delete_date)
             | Q(deleted=True)).order_by('-update_date')
         self.retry_counts["database"] = 0
         if queue:
             return (queue, False)
     except OperationalError as ex:
         if self.retry_counts["database"] <= settings.NUM_PROCESSOR_RETRIES:
             logging.warning(
                 f"Deletion Processor failed to retrieve the deletion queue. Will try again in {settings.DB_WAIT_TIME} seconds. {ex}"
             )
             time.sleep(
                 settings.DB_WAIT_TIME
             )  # wait and re-try (giving this more time in case db server is being rebooted)
             self.retry_counts[
                 "database"] = self.retry_counts["database"] + 1
         else:
             log_error(
                 f"Stopping. Deletion Processor failed to retrieve the pending deletion queue. {ex}"
             )
             self.terminate = True
     return (None, True)
Ejemplo n.º 3
0
 def delete_search_record(self):
     '''
     Delete the search record  from the database
     Returns:
         cont (bool): If the loop should continue
     '''
     try:
         self.cur_search.delete()
         self.retry_counts["database"] = 0
         return False
     except OperationalError as ex:
         if self.retry_counts["database"] <= settings.NUM_PROCESSOR_RETRIES:
             time.sleep(
                 settings.DB_WAIT_TIME
             )  # wait and re-try (giving this more time in case db server is being rebooted)
             self.retry_counts[
                 "database"] = self.retry_counts["database"] + 1
         else:
             log_error((
                 f"Stopping. Deletion Processor failed due to a database connectivity issue.",
                 f"(search id={'N/A' if self.cur_search is None else self.cur_search.search_id}.",
                 f" {create_error_message(ex, os.path.basename(__file__))}"
             ))
             self.terminate = True
         return True
 def get_queue(self):
     '''
     Check if there are items in the queueu to process that have
     completed downloading their results and haven't already
     completed compression.
     Returns:
         queue (list): Items to be processed
         cont (bool): If the loop should continue or not
     '''
     # check that there are items in the queue to process
     # that have completed downloading results and haven't already completed compression
     try:
         queue = self.searches.objects.filter(
             date_completed__isnull=False, date_completed_compression__isnull=True, failed_date__isnull=True, deleted=False).order_by('-update_date')
         self.retry_counts["database"] = 0
         if not queue:
             return (None, True)
     except OperationalError as ex:
         if self.retry_counts["database"] <= settings.NUM_PROCESSOR_RETRIES:
             logging.warning(f"Compression Processor failed to retrieve the compress queue. Will try again in {settings.DB_WAIT_TIME} seconds. {ex}")
             time.sleep(settings.DB_WAIT_TIME) # wait and re-try (giving this more time in case db server is being rebooted)
             self.retry_counts["database"] = self.retry_counts["database"] + 1
         else:
             log_error(f"Stopping. Queue Processor failed to retrieve the search queue. {ex}")
             self.terminate = True
         return (None, True)
     return (queue, False)
 def update_search_with_results(self):
     '''
     Save the compression with the final results
     Returns:
         cont (bool): If the loop should continue
         send_email (bool): If an email should be sent to the user
     '''
     try:
         self.cur_search.update_date = timezone.now()
         self.cur_search.date_completed_compression = timezone.now()
         if not self.cur_search.user_notified and settings.NOTIF_EMAIL_DOMAIN:
             self.cur_search.user_notified = True
             send_email = True
         else:
             send_email = False
         self.cur_search.save()
         self.retry_counts["database"] = 0
         return (False, send_email)
     except OperationalError as ex:
         if self.retry_counts["database"] <= settings.NUM_PROCESSOR_RETRIES:
             time.sleep(settings.DB_WAIT_TIME) # wait and re-try (giving this more time in case db server is being rebooted)
             self.retry_counts["database"] = self.retry_counts["database"] + 1
         else:
             log_error((f"Stopping. Compression Processor failed due to a database connectivity issue.",
                        f"(search id={'N/A' if self.cur_search is None else self.cur_search.search_id}.",
                        f" {create_error_message(ex, os.path.basename(__file__))}"))
             self.terminate = True
         return (True, False)
Ejemplo n.º 6
0
    def handle(self, *args, **options):  # pylint: disable=too-many-branches
        '''
        Handles the command when run from the command line.
        '''
        signal.signal(signal.SIGINT, self.sig_term)
        signal.signal(signal.SIGTERM, self.sig_term)

        self.terminate = False
        self.cur_search = None

        # Grab the necessary models
        self.searches = apps.get_model('textassembler_web', 'searches')

        logging.info(
            f"Starting deletion processing. Removing searches more than {settings.NUM_MONTHS_KEEP_SEARCHES} months old or marked as deleted"
        )
        while not self.terminate:
            time.sleep(1)  # take a quick break!
            try:
                # check that there are items in the queue to be deleted based on date completed/failed
                (queue, cont) = self.get_queue()
                if cont or not queue or self.terminate:
                    continue

                # verify the storage location is accessibly
                cont = self.check_storage()
                if cont or self.terminate:
                    continue

                # check that there are items in the queue to be deleted based on date completed/failed
                # we need to recheck this in case it changed while waiting for the storage
                # location to become accessible
                (queue, cont) = self.get_queue()
                if cont or not queue or self.terminate:
                    continue
                self.cur_search = queue[0]

                #  remove the files
                self.delete_search_files()

                # delete the search record
                cont = self.delete_search_record()
                if cont or not queue or self.terminate:
                    continue

            except Exception as exp:  #pylint: disable=broad-except
                # This scenario shouldn't happen, but handling it just in case
                # so that the service won't quit on-error
                log_error((
                    f"An unexpected error occurred while deleting old searches. "
                    f"{create_error_message(exp, os.path.basename(__file__))}"
                ))
                self.terminate = True  # stop the service since something is horribly wrong
                continue

        # any cleanup after terminate
        logging.info("Stopped compression processing.")
Ejemplo n.º 7
0
def download_search(request, search_id):
    '''
    need to download files from the server for the search
    '''

    # Verify that the user is logged in
    if not request.session.get('userid', False):
        return redirect('/login')

    error_message = ""
    try:
        # make sure the search documents requested are for the user that made the search (HTTP 403)
        search_obj = searches.objects.filter(search_id=search_id)
        if len(search_obj) == 1:
            search_obj = search_obj[0]
        else:
            error_message = \
                "The search record could not be located on the server. please contact a system administator."
        if search_obj.userid != str(request.session['userid']):
            error_message = "You do not have permissions to download searches other than ones you requested."

        # make sure the search file exists (HTTP 404)
        if error_message == "":
            zipfile = find_zip_file(search_id)
            if zipfile is None or not os.path.exists(zipfile) or not os.access(
                    zipfile, os.R_OK):
                error_message = \
                    "The search results can not be located on the server. please contact a system administator."

        if error_message == "":
            # download the search zip
            with open(zipfile, 'rb') as flh:
                response = HttpResponse(
                    flh.read(), content_type="application/force-download")
                response[
                    'Content-Disposition'] = 'attachment; filename=' + os.path.basename(
                        zipfile)
                request.session["error_message"] = error_message
                return response
    except Exception as exp:  # pylint: disable=broad-except
        error = create_error_message(exp, os.path.basename(__file__))
        log_error(f"Error downloading search {search_id}. {error}",
                  json.dumps(dict(request.POST)))

        if settings.DEBUG:
            error_message = error
        else:
            error_message = "An unexpected error has occurred."

    request.session["error_message"] = error_message
    return redirect(mysearches)
 def check_storage(self):
     '''
     Check to see if the storage location is available
     Returns:
         continue (bool): If you need to continue the loop
     '''
     if not os.access(settings.STORAGE_LOCATION, os.W_OK) or not os.path.isdir(settings.STORAGE_LOCATION):
         if self.retry_counts["storage"] <= settings.NUM_PROCESSOR_RETRIES:
             logging.error(f"Compression Processor failed due to storage location being inaccessible or not writable. {settings.STORAGE_LOCATION}")
             # wait and retry, if the storage is still not available, then terminate
             time.sleep(settings.STORAGE_WAIT_TIME)
             self.retry_counts["storage"] = self.retry_counts["storage"] + 1
         else:
             log_error(f"Stopping. Compression Processor failed due to storage location being inaccessible or not writable. {settings.STORAGE_LOCATION}")
             self.terminate = True
         return True
     self.retry_counts["storage"] = 0
     return False
 def set_start_time(self):
     '''
     Set the start time for the compression to now
     Returns:
         continue (bool): If you need to continue the loop
     '''
     self.cur_search.update_date = timezone.now()
     self.cur_search.date_started_compression = timezone.now()
     try:
         self.cur_search.save()
     except OperationalError as ex:
         if self.retry_counts["database"] <= settings.NUM_PROCESSOR_RETRIES:
             logging.error(f"Failed to update the start time in the database.")
             time.sleep(settings.DB_WAIT_TIME) # wait and re-try (giving this more time in case db server is being rebooted)
             self.retry_counts["database"] = self.retry_counts["database"] + 1
         else:
             log_error(f"Stopping. Failed to set the start time in the database for {self.cur_search.search_id}. {ex}")
             self.terminate = True
         return True
     return False
    def compress_search(self):
        '''
        Compresses the search results
        Returns:
            cont (bool): If the loop should continue
        '''
        try:
            # compress the files for the search
            zippath = os.path.join(settings.STORAGE_LOCATION, str(self.cur_search.search_id))
            zipname = settings.APP_NAME.replace(" ", "") + "_" + self.cur_search.date_submitted.strftime("%Y%m%d_%H%M%S")
            logging.info(f"Starting compression of search {self.cur_search.search_id}.")
            files_to_compress = []
            for root, dirs, files in os.walk(zippath):
                for fln in files:
                    files_to_compress.append(os.path.join(root, fln))
            with zipfile.ZipFile(os.path.join(zippath, zipname + ".zip"), 'w', zipfile.ZIP_DEFLATED) as zipf:
                for fln in files_to_compress:
                    target_name = re.sub(zippath+r'/\d+/\d+/\d+/', '', fln)
                    logging.info(f"Adding file to zip: {fln}. Target Name: {target_name}")
                    zipf.write(fln, target_name)

            logging.info(f"Completed compression of search {self.cur_search.search_id}")

            #  remove non-compressed files
            logging.info(f"Started cleanup of non-compressed files for search {self.cur_search.search_id}")
            for root, dirs, files in os.walk(zippath):
                for dirn in dirs:
                    logging.debug(f"Deleting directory: {os.path.join(root, dirn)}")
                    shutil.rmtree(os.path.join(root, dirn))

            logging.info(f"Completed cleanup of non-compressed files for search {self.cur_search.search_id}")
            self.retry_counts["filesystem"] = 0
        except OSError as ex:
            if self.retry_counts["filesystem"] <= settings.NUM_PROCESSOR_RETRIES:
                logging.error(f"Failed to compress the search: {self.cur_search.search_id}. {ex}")
                self.retry_counts["filesystem"] = self.retry_counts["filesystem"] + 1
            else:
                log_error(f"Stopping. Failed to compress the search for {self.cur_search.search_id}. {create_error_message(ex, os.path.basename(__file__))}")
                self.terminate = True
            return True
        return False
Ejemplo n.º 11
0
    def delete_search_files(self):
        '''
        Delete the files on the storage location for the current search
        '''
        logging.info(
            f"Started removal of files for search {self.cur_search.search_id}")
        save_location = os.path.join(settings.STORAGE_LOCATION,
                                     str(self.cur_search.search_id))
        zip_path = find_zip_file(self.cur_search.search_id)

        if os.path.isdir(save_location):
            try:
                shutil.rmtree(save_location)
            except OSError as ex1:
                log_error(
                    f"Could not delete files for search {self.cur_search.search_id}. {ex1}",
                    self.cur_search)
        if zip_path != None and os.path.exists(zip_path):
            try:
                os.remove(zip_path)
            except OSError as ex2:
                log_error(
                    f"Could not delete the zipped file for search {self.cur_search.search_id}. {ex2}",
                    self.cur_search)
        if os.path.isdir(save_location):
            try:
                os.rmdir(save_location)
            except OSError as ex3:
                log_error(
                    f"Could not delete root directory for search {self.cur_search.search_id}. {ex3}",
                    self.cur_search)
        logging.info(
            f"Completed deletion of files for search {self.cur_search.search_id}"
        )
Ejemplo n.º 12
0
def search(request):  # pylint:disable=too-many-locals, too-many-branches, too-many-statements
    '''
    Render the search page
    '''

    # Verify that the user is logged in
    if not request.session.get('userid', False):
        return redirect('/login')

    filter_data = get_available_filters()
    set_filters = {}
    set_formats = []
    set_post_filters = []

    logging.debug("==== POST DATA ====")
    logging.debug(request.POST)

    # Set the initial form data
    form = TextAssemblerWebForm(request.POST or None)
    form.set_fields(get_available_filters(False),
                    request.POST['search'] if 'search' in request.POST else '')

    response = {
        "form":
        form,
        "error_message":
        "",
        "available_formats":
        available_formats.objects.all(),
        "available_sort_orders":
        available_sort_orders.objects.filter(removed__isnull=True),
    }

    # Parse the POST data
    for opt in filter_data:
        filter_data = {
            k: v
            for k, v in dict(request.POST).items()
            if k.lower() == opt['id'].lower()
        }
        for fld, vals in filter_data.items():
            set_filters[fld] = vals
    if "selected-formats" in dict(request.POST):
        set_formats = dict(request.POST)['selected-formats']
    if "post_filters" in dict(request.POST):
        set_post_filters = dict(request.POST)['post_filters']
    if "selected-sort-order" in dict(request.POST):
        set_sort_order = int(dict(request.POST)['selected-sort-order'][0])

    # Add post filters to set_filters
    for post_filter in set_post_filters:
        name = post_filter.split("||")[0]
        value = post_filter.split("||")[1]
        fmt = get_format_type(name)
        # convert the value(s) from base64 if the filter expects it
        if fmt == 'base64':
            value = base64.b64decode(value + "=========").decode('utf-8')
        if string_is_int(value):
            value = int(value)
        if name in set_filters:
            set_filters[name].append(value)
        else:
            set_filters[name] = [value]

    # Parse the User For field if an admin
    search_user = request.session['userid']
    if get_is_admin(request.session['userid']):
        if 'user_for' in dict(request.POST):
            search_user = dict(request.POST)['user_for'][0]
            logging.debug(
                f"Overriding search save user from {request.session['userid']} to {search_user}"
            )

    logging.debug("==== SET FILTERS ====")
    logging.debug(set_filters)

    # Validate any data necessary from the post data
    for key, values in set_filters.items():
        ## Make sure Year is an integer
        if key == 'year(Date)':
            for value in values:
                if not string_is_int(value) and not isinstance(value, int):
                    response['error_message'] += \
                        f"The 'Year' field requires only numeric input, provided: {value}."
        else:
            for value in values:
                if not value:
                    response['error_message'] += \
                        f"The '{key}' field can not be blank, please provide a value or remove the filter."
                    break
    if 'Date' in set_filters and 'year(Date)' in set_filters:
        response['error_message'] += \
            "Please you either the year filter or the range filter for dates, but not a combination of both."
    if len(search_user) > 50:
        response['error_message'] += \
            "The provided 'For User ID' value is longer than the maximum of 50 characters."

    # Send the set filters back to the form to pre-populate the fields
    response["post_data"] = json.dumps(set_filters)

    # Set the last result data to be used in event of form failure to prevent another call to LN API
    if 'result_data' in request.POST and request.POST['result_data']:
        response["result_data"] = json.loads(request.POST['result_data'])

    if request.method == 'POST' and form.is_valid(
    ) and response['error_message'] == '':

        clean = form.cleaned_data

        if clean["search"] != "":
            try:
                if "preview-search" in dict(
                        request.POST) or "add-filters" in dict(request.POST):
                    # Preview Search button selected
                    response = handle_preview_search(clean['search'],
                                                     set_filters, response)

                elif "submit-search" in dict(request.POST):
                    # Submit Search button selected
                    response = handle_save_search(search_user, clean['search'],
                                                  set_filters, set_formats,
                                                  set_sort_order, response,
                                                  set_post_filters,
                                                  request.session['userid'])
                    if "error_message" not in response:
                        return response

            except Exception as exp:  # pylint: disable=broad-except
                error = create_error_message(exp, os.path.basename(__file__))
                log_error(
                    f"Error occurred while processing search request. {error}",
                    json.dumps(dict(request.POST)))

                if settings.DEBUG:
                    response["error_message"] = error
                else:
                    response[
                        "error_message"] = "An unexpected error has occurred."

                # Set the result data with the previous results if an error occurs
                # only do this if there are not already results since we don't want to overwrite those
                if "result_data" in response and 'search_results' not in response:
                    response['search_results'] = response['result_data']

    elif request.method == 'POST' and not form.is_valid():
        # If there are any form errors, add them to the fields to they highlight the issues
        for field in form.errors:
            form[field].field.widget.attrs['class'] += ' error-field'

    return render(request, "textassembler_web/search.html", response)
    def handle(self, *args, **options): # pylint: disable=too-many-branches
        signal.signal(signal.SIGINT, self.sig_term)
        signal.signal(signal.SIGTERM, self.sig_term)

        self.terminate = False
        self.cur_search = None

        # Grab the necessary models
        self.searches = apps.get_model('textassembler_web', 'searches')

        logging.info("Starting compression processing.")
        while not self.terminate:
            time.sleep(1) # take a quick break!
            try:
                (queue, cont) = self.get_queue()
                if cont or not queue or self.terminate:
                    continue

                # verify the storage location is accessibly
                cont = self.check_storage()
                if cont or self.terminate:
                    continue

                # get the next item from the queue
                ## we are doing this again in case the search has been deleted
                ## while waiting for the API to be available
                (queue, cont) = self.get_queue()
                if cont or not queue or self.terminate:
                    continue
                self.cur_search = queue[0]

                # mark the search record as started compression
                cont = self.set_start_time()
                if cont or not queue or self.terminate:
                    continue

                cont = self.compress_search()
                if cont or self.terminate:
                    continue

                ## save the results to the database
                (cont, send_email) = self.update_search_with_results()
                if cont or self.terminate:
                    continue

                #  send email notification
                #   sending this after the DB save in case that fails for some reason
                #   this is to prevent users from receiving multiple notifications
                if send_email:
                    send_user_notification(self.cur_search.userid, self.cur_search.query,
                                           self.cur_search.date_submitted, self.cur_search.num_results_downloaded)

            except Exception as exp: # pylint: disable=broad-except
                # This scenario shouldn't happen, but handling it just in case
                # so that the service won't quit on-error
                log_error((f"Stopping. An unexpected error occurred while compressing the completed search queue. "
                           f"{create_error_message(exp, os.path.basename(__file__))}"))
                self.terminate = True # stop the service since something is horribly wrong
                continue

        # any cleanup after terminate
        logging.info("Stopped compression processing.")