Python get_spreadsheet Examples, corehq.apps.importer.util.get_spreadsheet Python Examples

Example #1

0

Show file

File: tasks.py Project: nnestle/commcare-hq

def bulk_import_async(import_id, config, domain, excel_id):
    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)
    result = do_import(spreadsheet, config, domain, task=bulk_import_async)

    # return compatible with soil
    return {"messages": result}

Example #2

0

Show file

File: views.py Project: philipkaare/commcare-hq

def excel_commit(request, domain):
    """
    Step three of three.

    This page is submitted with the list of column to
    case property mappings for this upload.

    The config variable is an ImporterConfig object that
    has everything gathered from previous steps, with the
    addition of all the field data. See that class for
    more information.
    """
    config = importer_util.ImporterConfig.from_request(request)

    excel_id = request.session.get(EXCEL_SESSION_ID)

    excel_ref = DownloadBase.get(excel_id)
    try:
        importer_util.get_spreadsheet(excel_ref, config.named_columns)
    except ImporterError as e:
        return render_error(request, domain, _get_importer_error_message(e))

    download = DownloadBase()
    download.set_task(bulk_import_async.delay(
        download.download_id,
        config,
        domain,
        excel_id,
    ))

    try:
        del request.session[EXCEL_SESSION_ID]
    except KeyError:
        pass

    return render(
        request,
        "importer/excel_commit.html", {
            'download_id': download.download_id,
            'template': 'importer/partials/import_status.html',
            'domain': domain,
            'report': {
                'name': 'Import: Completed'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #3

0

Show file

File: views.py Project: yonglehou/commcare-hq

def excel_commit(request, domain):
    """
    Step three of three.

    This page is submitted with the list of column to
    case property mappings for this upload.

    The config variable is an ImporterConfig object that
    has everything gathered from previous steps, with the
    addition of all the field data. See that class for
    more information.
    """
    config = importer_util.ImporterConfig.from_request(request)

    excel_id = request.session.get(EXCEL_SESSION_ID)

    excel_ref = DownloadBase.get(excel_id)
    try:
        importer_util.get_spreadsheet(excel_ref, config.named_columns)
    except ImporterError as e:
        return render_error(request, domain, get_importer_error_message(e))

    download = DownloadBase()
    download.set_task(bulk_import_async.delay(
        config,
        domain,
        excel_id,
    ))

    try:
        del request.session[EXCEL_SESSION_ID]
    except KeyError:
        pass

    return render(
        request,
        "importer/excel_commit.html", {
            'download_id': download.download_id,
            'template': 'importer/partials/import_status.html',
            'domain': domain,
            'report': {
                'name': 'Import: Completed'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #4

0

Show file

File: tasks.py Project: bazuzi/commcare-hq

def bulk_import_async(import_id, config, domain, excel_id):
    excel_ref = DownloadBase.get(excel_id)
    try:
        spreadsheet_or_error = importer_util.get_spreadsheet(excel_ref, config.named_columns)
    except ImporterError as spreadsheet_or_error:
        pass

    result = do_import(spreadsheet_or_error, config, domain, task=bulk_import_async)

    # return compatible with soil
    return {"messages": result}

Example #5

0

Show file

def excel_commit(request, domain):
    """
    Step three of three.

    This page is submitted with the list of column to
    case property mappings for this upload.

    The config variable is an ImporterConfig object that
    has everything gathered from previous steps, with the
    addition of all the field data. See that class for
    more information.
    """
    config = importer_util.ImporterConfig.from_request(request)

    excel_id = request.session.get(EXCEL_SESSION_ID)

    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)

    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    if spreadsheet.has_errors:
        messages.error(request, _('The session containing the file you '
                                  'uploaded has expired - please upload '
                                  'a new one.'))
        return HttpResponseRedirect(base.ImportCases.get_url(domain=domain) + "?error=cache")

    download = DownloadBase()
    download.set_task(bulk_import_async.delay(
        download.download_id,
        config,
        domain,
        excel_id,
    ))

    try:
        del request.session[EXCEL_SESSION_ID]
    except KeyError:
        pass

    return render(
        request,
        "importer/excel_commit.html", {
            'download_id': download.download_id,
            'template': 'importer/partials/import_status.html',
            'domain': domain,
            'report': {
                'name': 'Import: Completed'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #6

0

Show file

File: views.py Project: johan--/commcare-hq

def excel_commit(request, domain):
    """
    Step three of three.

    This page is submitted with the list of column to
    case property mappings for this upload.

    The config variable is an ImporterConfig object that
    has everything gathered from previous steps, with the
    addition of all the field data. See that class for
    more information.
    """
    config = importer_util.ImporterConfig.from_request(request)

    excel_id = request.session.get(EXCEL_SESSION_ID)

    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)

    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    if spreadsheet.has_errors:
        messages.error(request, _('The session containing the file you '
                                  'uploaded has expired - please upload '
                                  'a new one.'))
        return HttpResponseRedirect(base.ImportCases.get_url(domain=domain) + "?error=cache")

    download = DownloadBase()
    download.set_task(bulk_import_async.delay(
        download.download_id,
        config,
        domain,
        excel_id,
    ))

    try:
        del request.session[EXCEL_SESSION_ID]
    except KeyError:
        pass

    return render(
        request,
        "importer/excel_commit.html", {
            'download_id': download.download_id,
            'template': 'importer/partials/import_status.html',
            'domain': domain,
            'report': {
                'name': 'Import: Completed'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #7

0

Show file

def bulk_import_async(config, domain, excel_id):
    excel_ref = DownloadBase.get(excel_id)
    try:
        spreadsheet = importer_util.get_spreadsheet(excel_ref,
                                                    config.named_columns)
    except Exception as e:
        return {'errors': get_importer_error_message(e)}

    try:
        result = do_import(spreadsheet, config, domain, task=bulk_import_async)
    except Exception as e:
        return {'errors': 'Error: ' + e.message}

    # return compatible with soil
    return {'messages': result}

Example #8

0

Show file

File: views.py Project: thedevelopermw/commcare-hq

def excel_commit(request, domain):
    config = importer_util.ImporterConfig.from_request(request)

    excel_id = request.session.get(EXCEL_SESSION_ID)

    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref,
                                                config.named_columns)

    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    if spreadsheet.has_errors:
        messages.error(
            request,
            _('The session containing the file you '
              'uploaded has expired - please upload '
              'a new one.'))
        return HttpResponseRedirect(
            base.ImportCases.get_url(domain=domain) + "?error=cache")

    download = DownloadBase()
    download.set_task(
        bulk_import_async.delay(
            download.download_id,
            config,
            domain,
            excel_id,
        ))

    try:
        del request.session[EXCEL_SESSION_ID]
    except KeyError:
        pass

    return render(
        request, "importer/excel_commit.html", {
            'download_id': download.download_id,
            'template': 'importer/partials/import_status.html',
            'domain': domain,
            'report': {
                'name': 'Import: Completed'
            },
            'slug': base.ImportCases.slug
        })

Example #9

0

Show file

File: views.py Project: esoergel/commcare-hq

def excel_commit(request, domain):
    config = ImporterConfig(request)

    excel_id = request.session.get(EXCEL_SESSION_ID)

    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)

    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    if spreadsheet.has_errors:
        messages.error(request, _('The session containing the file you '
                                  'uploaded has expired - please upload '
                                  'a new one.'))
        return HttpResponseRedirect(base.ImportCases.get_url(domain=domain) + "?error=cache")

    download = DownloadBase()
    download.set_task(bulk_import_async.delay(
        download.download_id,
        config,
        domain,
        excel_id,
    ))

    try:
        del request.session[EXCEL_SESSION_ID]
    except KeyError:
        pass

    return render(request, "importer/excel_commit.html", {
                                'download_id': download.download_id,
                                'template': 'importer/partials/import_status.html',
                                'domain': domain,
                                'report': {
                                    'name': 'Import: Completed'
                                 },
                                'slug': base.ImportCases.slug})

Example #10

0

Show file

File: tasks.py Project: NoahCarnahan/commcare-hq

def bulk_import_async(import_id, config, domain, excel_id):
    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)
    return do_import(spreadsheet, config, domain, task=bulk_import_async)

Example #11

0

Show file

File: views.py Project: johan--/commcare-hq

def excel_config(request, domain):
    """
    Step one of three.

    This is the initial post when the user uploads the excel file

    named_columns:
        Whether or not the first row of the excel sheet contains
        header strings for the columns. This defaults to True and
        should potentially not be an option as it is always used
        due to how important it is to see column headers
        in the rest of the importer.
    """
    if request.method != 'POST':
        return HttpResponseRedirect(base.ImportCases.get_url(domain=domain))

    if not request.FILES:
        return render_error(request, domain, 'Please choose an Excel file to import.')

    named_columns = request.POST.get('named_columns') == "on"
    uploaded_file_handle = request.FILES['file']

    extension = os.path.splitext(uploaded_file_handle.name)[1][1:].strip().lower()

    # NOTE: We may not always be able to reference files from subsequent
    # views if your worker changes, so we have to store it elsewhere
    # using the soil framework.

    if extension not in importer_util.ExcelFile.ALLOWED_EXTENSIONS:
        return render_error(request, domain,
                            'The Excel file you chose could not be processed. '
                            'Please check that it is saved as a Microsoft '
                            'Excel 97/2000 .xls file.')

    # stash content in the default storage for subsequent views
    file_ref = expose_cached_download(
        uploaded_file_handle.read(),
        expiry=1*60*60,
        file_extension=file_extention_from_filename(uploaded_file_handle.name),
    )
    request.session[EXCEL_SESSION_ID] = file_ref.download_id
    spreadsheet = importer_util.get_spreadsheet(file_ref, named_columns)

    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    columns = spreadsheet.get_header_columns()
    row_count = spreadsheet.get_num_rows()

    if row_count == 0:
        return render_error(request, domain,
                            'Your spreadsheet is empty. '
                            'Please try again with a different spreadsheet.')

    case_types_from_apps = []
    # load types from all modules
    for row in ApplicationBase.view(
        'app_manager/types_by_module',
        reduce=True,
        group=True,
        startkey=[domain],
        endkey=[domain, {}]
    ).all():
        if not row['key'][1] in case_types_from_apps:
            case_types_from_apps.append(row['key'][1])

    case_types_from_cases = get_case_types_for_domain(domain)
    # for this we just want cases that have data but aren't being used anymore
    case_types_from_cases = filter(lambda x: x not in case_types_from_apps, case_types_from_cases)

    if len(case_types_from_apps) == 0 and len(case_types_from_cases) == 0:
        return render_error(
            request,
            domain,
            'No cases have been submitted to this domain and there are no '
            'applications yet. You cannot import case details from an Excel '
            'file until you have existing cases or applications.'
        )

    return render(
        request,
        "importer/excel_config.html", {
            'named_columns': named_columns,
            'columns': columns,
            'case_types_from_cases': case_types_from_cases,
            'case_types_from_apps': case_types_from_apps,
            'domain': domain,
            'report': {
                'name': 'Import: Configuration'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #12

0

Show file

File: views.py Project: johan--/commcare-hq

def excel_fields(request, domain):
    """
    Step two of three.

    Important values that are grabbed from the POST or defined by
    the user on this page:

    named_columns:
        Passed through from last step, see that for documentation

    case_type:
        The type of case we are matching to. When creating new cases,
        this is the type they will be created as. When updating
        existing cases, this is the type that we will search for.
        If the wrong case type is used when looking up existing cases,
        we will not update them.

    create_new_cases:
        A boolean that controls whether or not the user wanted
        to create new cases for any case that doesn't have a matching
        case id in the upload.

    search_column:
        Which column of the excel file we are using to specify either
        case ids or external ids. This is, strangely, required. If
        creating new cases only you would expect these to be blank with
        the create_new_cases flag set.

    search_field:
        Either case id or external id, determines which type of
        identification we are using to match to cases.

    key_column/value_column:
        These correspond to an advanced feature allowing a user
        to modify a single case with multiple rows.
    """
    named_columns = request.POST['named_columns']
    case_type = request.POST['case_type']
    try:
        search_column = request.POST['search_column']
    except MultiValueDictKeyError:
        # this is only true if your configuration is messed up in an irreparable way
        messages.error(request, _('It looks like you may have accessed this page from a stale page. '
                                  'Please start over.'))
        return _spreadsheet_expired(request, domain)

    search_field = request.POST['search_field']
    create_new_cases = request.POST.get('create_new_cases') == 'on'
    key_value_columns = request.POST.get('key_value_columns') == 'on'
    key_column = ''
    value_column = ''

    download_ref = DownloadBase.get(request.session.get(EXCEL_SESSION_ID))

    spreadsheet = importer_util.get_spreadsheet(download_ref, named_columns)
    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    columns = spreadsheet.get_header_columns()

    if key_value_columns:
        key_column = request.POST['key_column']
        value_column = request.POST['value_column']

        excel_fields = []
        key_column_index = columns.index(key_column)

        # if key/value columns were specified, get all the unique keys listed
        if key_column_index:
            excel_fields = spreadsheet.get_unique_column_values(key_column_index)

        # concatenate unique key fields with the rest of the columns
        excel_fields = columns + excel_fields
        # remove key/value column names from list
        excel_fields.remove(key_column)
        if value_column in excel_fields:
            excel_fields.remove(value_column)
    else:
        excel_fields = columns

    case_fields = get_case_properties(domain, case_type)

    # hide search column and matching case fields from the update list
    try:
        excel_fields.remove(search_column)
    except:
        pass

    try:
        case_fields.remove(search_field)
    except:
        pass

    # we can't actually update this so don't show it
    try:
        case_fields.remove('type')
    except:
        pass

    return render(
        request,
        "importer/excel_fields.html", {
            'named_columns': named_columns,
            'case_type': case_type,
            'search_column': search_column,
            'search_field': search_field,
            'create_new_cases': create_new_cases,
            'key_column': key_column,
            'value_column': value_column,
            'columns': columns,
            'excel_fields': excel_fields,
            'case_fields': case_fields,
            'domain': domain,
            'report': {
                'name': 'Import: Match columns to fields'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #13

0

Show file

File: tasks.py Project: esoergel/commcare-hq

def bulk_import_async(import_id, config, domain, excel_id):
    task = bulk_import_async

    excel_ref = DownloadBase.get(excel_id)

    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)

    if not spreadsheet:
        return {'error': 'EXPIRED'}
    if spreadsheet.has_errors:
        return {'error': 'HAS_ERRORS'}

    row_count = spreadsheet.get_num_rows()
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = errors = 0
    blank_external_ids = []
    invalid_dates = []
    owner_id_errors = []
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    # keep a cache of id lookup successes to help performance
    id_cache = {}

    for i in range(row_count):
        DownloadBase.set_progress(task, i, row_count)
        # skip first row if it is a header field
        if i == 0 and config.named_columns:
            continue

        priming_progress = match_count + created_count + prime_offset
        if priming_progress % PRIME_VIEW_FREQUENCY == 0:
            prime_views(POOL_SIZE)
            # increment so we can't possibly prime on next iteration
            prime_offset += 1

        row = spreadsheet.get_row(i)
        search_id = importer_util.parse_search_id(config, columns, row)
        if config.search_field == 'external_id' and not search_id:
            # do not allow blank external id since we save this
            blank_external_ids.append(i + 1)
            continue

        case, error = importer_util.lookup_case(
            config.search_field,
            search_id,
            domain,
            config.case_type
        )

        try:
            fields_to_update = importer_util.populate_updated_fields(
                config,
                columns,
                row
            )
        except importer_util.InvalidDateException:
            invalid_dates.append(i + 1)
            continue

        if case:
            pass
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
        elif error == LookupErrors.MultipleResults:
            too_many_matches += 1
            continue

        uploaded_owner_id = fields_to_update.pop('owner_id', None)
        if uploaded_owner_id:
            # If an owner_id mapping exists, verify it is a valid user
            # or case sharing group
            if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache):
                owner_id = uploaded_owner_id
                id_cache[uploaded_owner_id] = True
            else:
                owner_id_errors.append(i + 1)
                id_cache[uploaded_owner_id] = False
                continue
        else:
            # if they didn't supply an owner_id mapping, default to current
            # user
            owner_id = user_id

        external_id = fields_to_update.pop('external_id', None)

        if not case:
            id = uuid.uuid4().hex

            try:
                caseblock = CaseBlock(
                    create=True,
                    case_id=id,
                    version=V2,
                    user_id=user_id,
                    owner_id=owner_id,
                    case_type=config.case_type,
                    update=fields_to_update
                )
                if config.search_field == 'external_id':
                    caseblock['external_id'] = search_id

                submit_case_block(caseblock, domain, username, user_id)
                created_count += 1
            except CaseBlockError:
                errors += 1
        elif case and case.type == config.case_type:
            extras = {}
            if external_id:
                extras['external_id'] = external_id

            try:
                caseblock = CaseBlock(
                    create=False,
                    case_id=case._id,
                    owner_id=owner_id,
                    version=V2,
                    update=fields_to_update,
                    **extras
                )
                submit_case_block(caseblock, domain, username, user_id)
                match_count += 1
            except CaseBlockError:
                errors += 1

    return {
        'created_count': created_count,
        'match_count': match_count,
        'too_many_matches': too_many_matches,
        'blank_externals': blank_external_ids,
        'invalid_dates': invalid_dates,
        'owner_id_errors': owner_id_errors,
        'errors': errors,
    }

Example #14

0

Show file

File: views.py Project: thedevelopermw/commcare-hq

def excel_fields(request, domain):
    named_columns = request.POST['named_columns']
    case_type = request.POST['case_type']
    search_column = request.POST['search_column']
    search_field = request.POST['search_field']
    create_new_cases = request.POST.get('create_new_cases') == 'on'
    key_value_columns = request.POST.get('key_value_columns') == 'on'
    key_column = ''
    value_column = ''

    download_ref = DownloadBase.get(request.session.get(EXCEL_SESSION_ID))

    spreadsheet = importer_util.get_spreadsheet(download_ref, named_columns)
    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    columns = spreadsheet.get_header_columns()

    if key_value_columns:
        key_column = request.POST['key_column']
        value_column = request.POST['value_column']

        excel_fields = []
        key_column_index = columns.index(key_column)

        # if key/value columns were specified, get all the unique keys listed
        if key_column_index:
            excel_fields = spreadsheet.get_unique_column_values(
                key_column_index)

        # concatenate unique key fields with the rest of the columns
        excel_fields = columns + excel_fields
        # remove key/value column names from list
        excel_fields.remove(key_column)
        if value_column in excel_fields:
            excel_fields.remove(value_column)
    else:
        excel_fields = columns

    case_fields = importer_util.get_case_properties(domain, case_type)

    # hide search column and matching case fields from the update list
    try:
        excel_fields.remove(search_column)
    except:
        pass

    try:
        case_fields.remove(search_field)
    except:
        pass

    # we can't actually update this so don't show it
    try:
        case_fields.remove('type')
    except:
        pass

    return render(
        request, "importer/excel_fields.html", {
            'named_columns': named_columns,
            'case_type': case_type,
            'search_column': search_column,
            'search_field': search_field,
            'create_new_cases': create_new_cases,
            'key_column': key_column,
            'value_column': value_column,
            'columns': columns,
            'excel_fields': excel_fields,
            'case_fields': case_fields,
            'domain': domain,
            'report': {
                'name': 'Import: Match columns to fields'
            },
            'slug': base.ImportCases.slug
        })

Example #15

0

Show file

File: views.py Project: thedevelopermw/commcare-hq

def excel_config(request, domain):
    if request.method != 'POST':
        return HttpResponseRedirect(base.ImportCases.get_url(domain=domain))

    if not request.FILES:
        return render_error(request, domain,
                            'Please choose an Excel file to import.')

    named_columns = request.POST.get('named_columns') == "on"
    uploaded_file_handle = request.FILES['file']

    extension = os.path.splitext(
        uploaded_file_handle.name)[1][1:].strip().lower()

    # NOTE: We may not always be able to reference files from subsequent
    # views if your worker changes, so we have to store it elsewhere
    # using the soil framework.

    if extension not in importer_util.ExcelFile.ALLOWED_EXTENSIONS:
        return render_error(
            request, domain,
            'The Excel file you chose could not be processed. '
            'Please check that it is saved as a Microsoft '
            'Excel 97/2000 .xls file.')

    # stash content in the default storage for subsequent views
    file_ref = expose_download(uploaded_file_handle.read(), expiry=1 * 60 * 60)
    request.session[EXCEL_SESSION_ID] = file_ref.download_id
    spreadsheet = importer_util.get_spreadsheet(file_ref, named_columns)

    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    columns = spreadsheet.get_header_columns()
    row_count = spreadsheet.get_num_rows()

    if row_count == 0:
        return render_error(
            request, domain, 'Your spreadsheet is empty. '
            'Please try again with a different spreadsheet.')

    case_types_from_apps = []
    # load types from all modules
    for row in ApplicationBase.view('app_manager/types_by_module',
                                    reduce=True,
                                    group=True,
                                    startkey=[domain],
                                    endkey=[domain, {}]).all():
        if not row['key'][1] in case_types_from_apps:
            case_types_from_apps.append(row['key'][1])

    case_types_from_cases = []
    # load types from all case records
    for row in CommCareCase.view('hqcase/types_by_domain',
                                 reduce=True,
                                 group=True,
                                 startkey=[domain],
                                 endkey=[domain, {}]).all():
        if row['key'][1] and not row['key'][1] in case_types_from_cases:
            case_types_from_cases.append(row['key'][1])

    # for this we just want cases that have data but aren't being used anymore
    case_types_from_cases = filter(lambda x: x not in case_types_from_apps,
                                   case_types_from_cases)

    if len(case_types_from_apps) == 0 and len(case_types_from_cases) == 0:
        return render_error(
            request, domain,
            'No cases have been submitted to this domain and there are no '
            'applications yet. You cannot import case details from an Excel '
            'file until you have existing cases or applications.')

    return render(
        request, "importer/excel_config.html", {
            'named_columns': named_columns,
            'columns': columns,
            'case_types_from_cases': case_types_from_cases,
            'case_types_from_apps': case_types_from_apps,
            'domain': domain,
            'report': {
                'name': 'Import: Configuration'
            },
            'slug': base.ImportCases.slug
        })

Example #16

0

Show file

File: views.py Project: yonglehou/commcare-hq

def excel_fields(request, domain):
    """
    Step two of three.

    Important values that are grabbed from the POST or defined by
    the user on this page:

    named_columns:
        Passed through from last step, see that for documentation

    case_type:
        The type of case we are matching to. When creating new cases,
        this is the type they will be created as. When updating
        existing cases, this is the type that we will search for.
        If the wrong case type is used when looking up existing cases,
        we will not update them.

    create_new_cases:
        A boolean that controls whether or not the user wanted
        to create new cases for any case that doesn't have a matching
        case id in the upload.

    search_column:
        Which column of the excel file we are using to specify either
        case ids or external ids. This is, strangely, required. If
        creating new cases only you would expect these to be blank with
        the create_new_cases flag set.

    search_field:
        Either case id or external id, determines which type of
        identification we are using to match to cases.

    key_column/value_column:
        These correspond to an advanced feature allowing a user
        to modify a single case with multiple rows.
    """
    named_columns = request.POST['named_columns']
    case_type = request.POST['case_type']
    try:
        search_column = request.POST['search_column']
    except MultiValueDictKeyError:
        # this is only true if your configuration is messed up in an irreparable way
        messages.error(request, _('It looks like you may have accessed this page from a stale page. '
                                  'Please start over.'))
        return _spreadsheet_expired(request, domain)

    search_field = request.POST['search_field']
    create_new_cases = request.POST.get('create_new_cases') == 'on'
    key_value_columns = request.POST.get('key_value_columns') == 'on'
    key_column = ''
    value_column = ''

    download_ref = DownloadBase.get(request.session.get(EXCEL_SESSION_ID))

    try:
        spreadsheet = importer_util.get_spreadsheet(download_ref, named_columns)
    except ImporterError as e:
        return render_error(request, domain, get_importer_error_message(e))

    columns = spreadsheet.get_header_columns()

    if key_value_columns:
        key_column = request.POST['key_column']
        value_column = request.POST['value_column']

        excel_fields = []
        key_column_index = columns.index(key_column)

        # if key/value columns were specified, get all the unique keys listed
        if key_column_index:
            excel_fields = spreadsheet.get_unique_column_values(key_column_index)

        # concatenate unique key fields with the rest of the columns
        excel_fields = columns + excel_fields
        # remove key/value column names from list
        excel_fields.remove(key_column)
        if value_column in excel_fields:
            excel_fields.remove(value_column)
    else:
        excel_fields = columns

    case_fields = get_case_properties_for_case_type(domain, case_type)

    # hide search column and matching case fields from the update list
    try:
        excel_fields.remove(search_column)
    except:
        pass

    try:
        case_fields.remove(search_field)
    except:
        pass

    # we can't actually update this so don't show it
    try:
        case_fields.remove('type')
    except:
        pass

    return render(
        request,
        "importer/excel_fields.html", {
            'named_columns': named_columns,
            'case_type': case_type,
            'search_column': search_column,
            'search_field': search_field,
            'create_new_cases': create_new_cases,
            'key_column': key_column,
            'value_column': value_column,
            'columns': columns,
            'excel_fields': excel_fields,
            'case_fields': case_fields,
            'domain': domain,
            'report': {
                'name': 'Import: Match columns to fields'
            },
            'slug': base.ImportCases.slug
        }
    )

Example #17

0

Show file

def bulk_import_async(import_id, config, domain, excel_id):
    excel_ref = DownloadBase.get(excel_id)
    spreadsheet = importer_util.get_spreadsheet(excel_ref,
                                                config.named_columns)
    return do_import(spreadsheet, config, domain, task=bulk_import_async)

Example #18

0

Show file

File: views.py Project: esoergel/commcare-hq

def excel_fields(request, domain):
    named_columns = request.POST['named_columns']
    case_type = request.POST['case_type']
    search_column = request.POST['search_column']
    search_field = request.POST['search_field']
    create_new_cases = request.POST.get('create_new_cases') == 'on'
    key_value_columns = request.POST.get('key_value_columns') == 'on'
    key_column = ''
    value_column = ''

    download_ref = DownloadBase.get(request.session.get(EXCEL_SESSION_ID))

    spreadsheet = importer_util.get_spreadsheet(download_ref, named_columns)
    if not spreadsheet:
        return _spreadsheet_expired(request, domain)

    columns = spreadsheet.get_header_columns()

    if key_value_columns:
        key_column = request.POST['key_column']
        value_column = request.POST['value_column']

        excel_fields = []
        key_column_index = columns.index(key_column)

        # if key/value columns were specified, get all the unique keys listed
        if key_column_index:
            excel_fields = spreadsheet.get_unique_column_values(key_column_index)

        # concatenate unique key fields with the rest of the columns
        excel_fields = columns + excel_fields
        # remove key/value column names from list
        excel_fields.remove(key_column)
        if value_column in excel_fields:
            excel_fields.remove(value_column)
    else:
        excel_fields = columns

    case_fields = importer_util.get_case_properties(domain, case_type)

    # hide search column and matching case fields from the update list
    try:
        excel_fields.remove(search_column)
    except:
        pass

    try:
        case_fields.remove(search_field)
    except:
        pass

    # we can't actually update this so don't show it
    try:
        case_fields.remove('type')
    except:
        pass


    return render(request, "importer/excel_fields.html", {
                                'named_columns': named_columns,
                                'case_type': case_type,
                                'search_column': search_column,
                                'search_field': search_field,
                                'create_new_cases': create_new_cases,
                                'key_column': key_column,
                                'value_column': value_column,
                                'columns': columns,
                                'excel_fields': excel_fields,
                                'case_fields': case_fields,
                                'domain': domain,
                                'report': {
                                    'name': 'Import: Match columns to fields'
                                 },
                                'slug': base.ImportCases.slug})

Example #19

0

Show file

File: tasks.py Project: comm-scriptek/commcare-hq

def bulk_import_async(import_id, config, domain, excel_id):
    task = bulk_import_async

    excel_ref = DownloadBase.get(excel_id)

    spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns)

    if not spreadsheet:
        return {'error': 'EXPIRED'}
    if spreadsheet.has_errors:
        return {'error': 'HAS_ERRORS'}

    row_count = spreadsheet.get_num_rows()
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = 0
    blank_external_ids = []
    invalid_dates = []
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    for i in range(row_count):
        DownloadBase.set_progress(task, i, row_count)
        # skip first row if it is a header field
        if i == 0 and config.named_columns:
            continue

        priming_progress = match_count + created_count + prime_offset
        if priming_progress % PRIME_VIEW_FREQUENCY == 0:
            prime_views(POOL_SIZE)
            # increment so we can't possibly prime on next iteration
            prime_offset += 1

        row = spreadsheet.get_row(i)
        search_id = importer_util.parse_search_id(config, columns, row)
        if config.search_field == 'external_id' and not search_id:
            # do not allow blank external id since we save this
            blank_external_ids.append(i + 1)
            continue

        case, error = importer_util.lookup_case(config.search_field,
                                                search_id, domain)

        try:
            fields_to_update = importer_util.populate_updated_fields(
                config,
                columns,
                row
            )
        except importer_util.InvalidDateException:
            invalid_dates.append(i + 1)
            continue

        if case:
            match_count += 1
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
            created_count += 1
        elif error == LookupErrors.MultipleResults:
            too_many_matches += 1
            continue


        if 'owner_id' in fields_to_update:
            owner_id = fields_to_update['owner_id']
            del fields_to_update['owner_id']
        else:
            owner_id = user_id

        if not case:
            id = uuid.uuid4().hex

            caseblock = CaseBlock(
                create=True,
                case_id=id,
                version=V2,
                user_id=user_id,
                owner_id=owner_id,
                case_type=config.case_type,
                external_id=search_id if config.search_field == 'external_id' else '',
                update=fields_to_update
            )
            submit_case_block(caseblock, domain, username, user_id)
        elif case and case.type == config.case_type:
            caseblock = CaseBlock(
                create=False,
                case_id=case._id,
                owner_id=owner_id,
                version=V2,
                update=fields_to_update
            )
            submit_case_block(caseblock, domain, username, user_id)

    return {
        'created_count': created_count,
        'match_count': match_count,
        'too_many_matches': too_many_matches,
        'blank_externals': blank_external_ids,
        'invalid_dates': invalid_dates,
    }

Example #20

0

Show file

File: views.py Project: yonglehou/commcare-hq

def excel_config(request, domain):
    """
    Step one of three.

    This is the initial post when the user uploads the excel file

    named_columns:
        Whether or not the first row of the excel sheet contains
        header strings for the columns. This defaults to True and
        should potentially not be an option as it is always used
        due to how important it is to see column headers
        in the rest of the importer.
    """
    if request.method != 'POST':
        return HttpResponseRedirect(base.ImportCases.get_url(domain=domain))

    if not request.FILES:
        return render_error(request, domain, 'Please choose an Excel file to import.')

    named_columns = request.POST.get('named_columns') == "on"
    uploaded_file_handle = request.FILES['file']

    extension = os.path.splitext(uploaded_file_handle.name)[1][1:].strip().lower()

    # NOTE: We may not always be able to reference files from subsequent
    # views if your worker changes, so we have to store it elsewhere
    # using the soil framework.

    if extension not in importer_util.ExcelFile.ALLOWED_EXTENSIONS:
        return render_error(request, domain,
                            'The Excel file you chose could not be processed. '
                            'Please check that it is saved as a Microsoft '
                            'Excel 97/2000 .xls file.')

    # stash content in the default storage for subsequent views
    file_ref = expose_cached_download(
        uploaded_file_handle.read(),
        expiry=1*60*60,
        file_extension=file_extention_from_filename(uploaded_file_handle.name),
    )
    request.session[EXCEL_SESSION_ID] = file_ref.download_id
    try:
        spreadsheet = importer_util.get_spreadsheet(file_ref, named_columns)
    except ImporterError as e:
        return render_error(request, domain, get_importer_error_message(e))

    columns = spreadsheet.get_header_columns()
    row_count = spreadsheet.get_num_rows()

    if row_count == 0:
        return render_error(request, domain,
                            'Your spreadsheet is empty. '
                            'Please try again with a different spreadsheet.')

    case_types_from_apps = get_case_types_from_apps(domain)
    unrecognized_case_types = [t for t in CaseAccessors(domain).get_case_types()
                               if t not in case_types_from_apps]

    if len(case_types_from_apps) == 0 and len(unrecognized_case_types) == 0:
        return render_error(
            request,
            domain,
            'No cases have been submitted to this domain and there are no '
            'applications yet. You cannot import case details from an Excel '
            'file until you have existing cases or applications.'
        )

    return render(
        request,
        "importer/excel_config.html", {
            'named_columns': named_columns,
            'columns': columns,
            'unrecognized_case_types': unrecognized_case_types,
            'case_types_from_apps': case_types_from_apps,
            'domain': domain,
            'report': {
                'name': 'Import: Configuration'
            },
            'slug': base.ImportCases.slug
        }
    )