예제 #1
0
def remote_copy(request_id, src_file_name, registration_id):
    '''
    Remotely copies a source file to a remote machine
    '''
    task_info = {
        Constants.TASK_ID: remote_copy.request.id,
        Constants.CELERY_TASK_ID: remote_copy.request.id,
        Constants.REQUEST_GUID: request_id
    }
    try:
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.COPYING})
        http_file_upload(src_file_name, registration_id)
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.COPIED})

    except RemoteCopyError as e:
        log.error("Exception happened in remote copy. " + str(e))
        insert_extract_stats(
            task_info, {
                Constants.STATUS: ExtractStatus.FAILED,
                Constants.INFO: 'remote copy has failed: ' + str(e)
            })
        try:
            # this looks funny to you, but this is just a working around solution for celery bug
            # since exc option is not really working for retry.
            raise ExtractionError(str(e))
        except ExtractionError as exc:
            # this could be caused by network hiccup
            raise remote_copy.retry(
                args=[request_id, src_file_name, registration_id], exc=exc)

    except Exception as e:
        raise ExtractionError(str(e))
예제 #2
0
def archive_with_encryption(request_id, recipients, archive_file_name,
                            directory):
    '''
    given a directory, archive everything in this directory to a file name specified
    '''

    retryable = False
    exception_thrown = False

    try:
        task_info = {
            Constants.TASK_ID: archive_with_encryption.request.id,
            Constants.CELERY_TASK_ID: archive_with_encryption.request.id,
            Constants.REQUEST_GUID: request_id
        }
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.ARCHIVING})

        gpg_binary_file = get_setting(Config.BINARYFILE)
        homedir = get_setting(Config.HOMEDIR)
        keyserver = get_setting(Config.KEYSERVER)
        encrypted_archive_files(directory,
                                recipients,
                                archive_file_name,
                                homedir=homedir,
                                keyserver=keyserver,
                                gpgbinary=gpg_binary_file)
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.ARCHIVED})
    except GPGPublicKeyException as e:
        # recoverable exception
        retryable = True
        exception_thrown = True
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
    except Exception as e:
        # unrecoverable exception
        exception_thrown = True
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })

    if exception_thrown:
        if retryable:
            try:
                # this looks funny to you, but this is just a working around solution for celery bug
                # since exc option is not really working for retry.
                raise ExtractionError()
            except ExtractionError as exc:
                raise archive_with_encryption.retry(args=[
                    request_id, recipients, archive_file_name, directory
                ],
                                                    exc=exc)
        else:
            raise ExtractionError()
def copy_to_sftp_lz(request_id,
                    src_file_name,
                    tenant,
                    gatekeeper,
                    sftp_info,
                    timeout=1800):
    '''
    Remotely copies a source file to a remote machine
    '''
    task_info = {
        Constants.TASK_ID: copy_to_sftp_lz.request.id,
        Constants.CELERY_TASK_ID: copy_to_sftp_lz.request.id,
        Constants.REQUEST_GUID: request_id
    }
    try:
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.COPYING})
        edextract.utils.file_remote_copy.copy(src_file_name,
                                              sftp_info[0],
                                              tenant,
                                              gatekeeper,
                                              sftp_info[1],
                                              sftp_info[2],
                                              timeout=timeout)
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.COPIED})
    except RemoteCopyError as e:
        log.error("Exception happened in remote copy to sftp lz. " + str(e))
        insert_extract_stats(
            task_info, {
                Constants.STATUS: ExtractStatus.FAILED,
                Constants.INFO: 'remote copy to sftp lz has failed: ' + str(e)
            })
        try:
            # this looks funny to you, but this is just a working around solution for celery bug
            # since exc option is not really working for retry.
            raise ExtractionError(str(e))
        except ExtractionError as exc:
            # this could be caused by network hiccup
            raise copy_to_sftp_lz.retry(args=[
                request_id, src_file_name, tenant, gatekeeper, sftp_info
            ],
                                        kwargs={'timeout': timeout},
                                        exc=exc)
    except Exception as e:
        raise ExtractionError(str(e))
예제 #4
0
def prepare_path(request_id, paths):
    '''
    Given a list of paths of directories, creates it if it doesn't exist
    '''
    task_info = {
        Constants.TASK_ID: prepare_path.request.id,
        Constants.CELERY_TASK_ID: prepare_path.request.id,
        Constants.REQUEST_GUID: request_id
    }
    try:
        for path in paths:
            file_utils.prepare_path(path)

    except Exception as e:
        # which thrown from prepare_path
        # unrecoverable error, do not try to retry celery task.  it's just wasting time.
        log.error(e)
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
        raise ExtractionError()
예제 #5
0
def archive(request_id, archive_file_name, directory):
    '''
    given a directory, archive everything in this directory to a file name specified
    '''
    try:
        task_info = {
            Constants.TASK_ID: archive.request.id,
            Constants.CELERY_TASK_ID: archive.request.id,
            Constants.REQUEST_GUID: request_id
        }

        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.ARCHIVING})
        archive_files(directory, archive_file_name)
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.ARCHIVED})

    except Exception as e:
        # unrecoverable exception
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
        raise ExtractionError()
예제 #6
0
def generate_item_or_raw_extract_file(tenant, request_id, task):
    """
    Generates an item level/raw extract file given task arguments.

    @param tenant: Tenant name
    @param request_id: Extract request ID
    @param task: Calling task
    """
    task_id = task[TaskConstants.TASK_TASK_ID]
    extract_type = task[TaskConstants.EXTRACTION_DATA_TYPE]
    log.info(
        'execute {task_name} for task {task_id}, extract type {extract_type}'.
        format(task_name=generate_item_or_raw_extract_file.name,
               task_id=task_id,
               extract_type=extract_type))
    output_dirs = task[TaskConstants.DIRECTORY_TO_ARCHIVE]
    if type(output_dirs) is not list:
        output_dirs = [output_dirs]
    output_files = task[TaskConstants.TASK_FILE_NAME]
    if type(output_files) is not list:
        output_files = [output_files]

    task_info = {
        Constants.TASK_ID: task_id,
        Constants.CELERY_TASK_ID: generate_item_or_raw_extract_file.request.id,
        Constants.REQUEST_GUID: request_id
    }
    retryable = False
    exception_thrown = False
    output_file = None
    try:
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.EXTRACTING})
        if tenant is None:
            insert_extract_stats(
                task_info, {Constants.STATUS: ExtractStatus.FAILED_NO_TENANT})
        else:
            if extract_type is ExtractionDataType.QUERY_ITEMS_CSV:
                for output_file in output_files:
                    if not os.path.isdir(os.path.dirname(output_file)):
                        raise FileNotFoundError(
                            os.path.dirname(output_file) + " doesn't exist")
            elif extract_type is ExtractionDataType.QUERY_RAW_XML:
                for output_dir in output_dirs:
                    if not os.path.isdir(output_dir):
                        raise FileNotFoundError(output_dir + " doesn't exist")

            # for item level the output path is a list of one or more files
            # and for raw extract the output path is a list of one or more directory
            # to place all the matching xml files
            if extract_type == ExtractionDataType.QUERY_ITEMS_CSV:
                output_paths = output_files
            else:
                output_paths = output_dirs
            # Extract data to file
            extract_func = get_extract_func(extract_type)
            extract_func(tenant, output_paths, task_info, task)

    except FileNotFoundError as e:
        # which thrown from prepare_path
        # unrecoverable error, do not try to retry celery task.  it's just wasting time.
        if output_file is not None and os.path.isfile(output_file):
            # file should be deleted if there is an error
            os.unlink(output_file)
        log.error(e)
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
        exception_thrown = True
        retryable = False

    except Exception as e:
        if output_file is not None and os.path.isfile(output_file):
            # file should be deleted if there is an error
            os.unlink(output_file)
        log.error(e)
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
        exception_thrown = True
        retryable = True

    if exception_thrown:
        if retryable:
            # this looks funny to you, but this is just a working around solution for celery bug
            # since exc option is not really working for retry.
            try:
                raise ExtractionError()
            except ExtractionError as exc:
                raise generate_extract_file.retry(
                    args=[tenant, request_id, task], exc=exc)
        else:
            raise ExtractionError()
예제 #7
0
def generate_extract_file(tenant, request_id, task):
    """
    Generates an extract file given task arguments.

    @param tenant: Tenant name
    @param request_id: Extract request ID
    @param task: Calling task
    @param extract_type: Specific type of data extract for calling task
    """

    task_id = task[TaskConstants.TASK_TASK_ID]
    extract_type = task[TaskConstants.EXTRACTION_DATA_TYPE]
    log.info(
        'execute {task_name} for task {task_id}, extract type {extract_type}'.
        format(task_name=generate_extract_file.name,
               task_id=task_id,
               extract_type=extract_type))
    output_file = task[TaskConstants.TASK_FILE_NAME]
    task_info = {
        Constants.TASK_ID: task_id,
        Constants.CELERY_TASK_ID: generate_extract_file.request.id,
        Constants.REQUEST_GUID: request_id
    }
    retryable = False
    exception_thrown = False

    try:
        insert_extract_stats(task_info,
                             {Constants.STATUS: ExtractStatus.EXTRACTING})
        if tenant is None:
            insert_extract_stats(
                task_info, {Constants.STATUS: ExtractStatus.FAILED_NO_TENANT})
        else:
            if not os.path.isdir(os.path.dirname(output_file)):
                raise FileNotFoundError(
                    os.path.dirname(output_file) + " doesn't exist")

            # Extract data to file.
            extract_func = get_extract_func(extract_type)
            extract_func(tenant, output_file, task_info, task)

    except FileNotFoundError as e:
        # which thrown from prepare_path
        # unrecoverable error, do not try to retry celery task.  it's just wasting time.
        if os.path.isfile(output_file):
            # file should be deleted if there is an error
            os.unlink(output_file)
        log.error(e)
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
        exception_thrown = True
        retryable = False

    except Exception as e:
        if os.path.isfile(output_file):
            # file should be deleted if there is an error
            os.unlink(output_file)
        log.error(e)
        insert_extract_stats(task_info, {
            Constants.STATUS: ExtractStatus.FAILED,
            Constants.INFO: str(e)
        })
        exception_thrown = True
        retryable = True

    if exception_thrown:
        if retryable:
            # this looks funny to you, but this is just a working around solution for celery bug
            # since exc option is not really working for retry.
            try:
                raise ExtractionError()
            except ExtractionError as exc:
                raise generate_extract_file.retry(
                    args=[tenant, request_id, task], exc=exc)
        else:
            raise ExtractionError()