Example #1
0
def process_file_chunks():
    """
    This is the function that is called from the command line.  It runs through all new files
    that have been uploaded and 'chunks' them. Handles logic for skipping bad files, raising
    errors appropriately.
    This is primarily called manually during testing and debugging.
    """
    # Initialize the process and ensure there is no other process running at the same time
    error_handler = ErrorHandler()
    if FileProcessLock.islocked():
        raise ProcessingOverlapError(
            "Data processing overlapped with a previous data indexing run.")
    FileProcessLock.lock()

    try:
        number_bad_files = 0

        # Get the list of participants with open files to process
        participants = Participant.objects.filter(
            files_to_process__isnull=False).distinct()
        print("processing files for the following users: %s" %
              ",".join(participants.values_list('patient_id', flat=True)))

        for participant in participants:
            while True:
                previous_number_bad_files = number_bad_files
                starting_length = participant.files_to_process.exclude(
                    deleted=True).count()

                print(
                    "%s processing %s, %s files remaining" %
                    (datetime.now(), participant.patient_id, starting_length))

                # Process the desired number of files and calculate the number of unprocessed files
                number_bad_files += do_process_user_file_chunks(
                    count=FILE_PROCESS_PAGE_SIZE,
                    error_handler=error_handler,
                    skip_count=number_bad_files,
                    participant=participant,
                )

                # If no files were processed, quit processing
                if (participant.files_to_process.exclude(deleted=True).count()
                        == starting_length
                        and previous_number_bad_files == number_bad_files):
                    # Cases:
                    #   every file broke, might as well fail here, and would cause infinite loop otherwise.
                    #   no new files.
                    break
    finally:
        FileProcessLock.unlock()

    error_handler.raise_errors()
    raise EverythingWentFine(DATA_PROCESSING_NO_ERROR_STRING)
def process_file_chunks_lambda():
    """
    This is the function that is called from the command line.  It runs through all new files
    that have been uploaded and 'chunks' them. Handles logic for skipping bad files, raising
    errors appropriately.
    This is primarily called manually during testing and debugging.
    """
    # Initialize the process and ensure there is no other process running at the same time
    error_handler = ErrorHandler()
    if FileProcessLock.islocked():
        raise ProcessingOverlapError(
            "Data processing overlapped with a previous data indexing run.")
    FileProcessLock.lock()

    try:
        number_bad_files = 0

        # Get the list of participants with open files to process
        participants = Participant.objects.filter(
            files_to_process__isnull=False).distinct()
        print("processing files for the following users: %s" %
              ",".join(participants.values_list('patient_id', flat=True)))

        for participant in participants:
            for fp in participant.files_to_process.all():
                print(fp.s3_file_path)
                event = {
                    'Records': [{
                        's3': {
                            'object': {
                                'key': fp.s3_file_path
                            }
                        }
                    }]
                }

                chunk_file_lambda_handler(event, [])

    finally:
        FileProcessLock.unlock()

    error_handler.raise_errors()
    raise EverythingWentFine(DATA_PROCESSING_NO_ERROR_STRING)
def process_file_chunks():
    """ This is the function that is called from cron.  It runs through all new files that have
    been uploaded and 'chunks' them. Handles logic for skipping bad files, raising errors
    appropriately. """
    error_handler = ErrorHandler()
    if FileProcessLock.islocked():
        raise ProcessingOverlapError(
            "Data processing overlapped with a previous data indexing run.")
    FileProcessLock.lock()
    number_bad_files = 0

    user_ids = set(FilesToProcess(field="user_id"))
    print "processing files for the following users: %s" % ",".join(user_ids)
    for user_id in user_ids:
        while True:
            previous_number_bad_files = number_bad_files
            starting_length = FilesToProcess.count(user_id=user_id)

            print str(datetime.now()), "processing %s, %s files remaining" % (
                user_id, starting_length)

            number_bad_files += do_process_user_file_chunks(
                count=FILE_PROCESS_PAGE_SIZE,
                error_handler=error_handler,
                skip_count=number_bad_files,
                user_id=user_id)

            if starting_length == FilesToProcess.count(
                    user_id=user_id):  #zero files processed
                if previous_number_bad_files == number_bad_files:
                    # Cases:
                    #   every file broke, might as well fail here, and would cause infinite loop otherwise.
                    #   no new files.
                    break
                else:
                    continue
    FileProcessLock.unlock()
    error_handler.raise_errors()
    raise EverythingWentFine(DATA_PROCESSING_NO_ERROR_STRING)