Example #1
0
def get_total_clients_data(domains, datespan, interval, datefield='opened_on'):
    """
    Returns cases that have used SMS.
    Returned based on date case is opened
    """
    sms_cases = (SMSES().to_commcare_case().domain(domains).terms_aggregation(
        'couch_recipient', 'cases').size(0))

    cases = sms_cases.run().aggregations.cases.keys

    cases_after_date = (CaseES().domain(domains).filter({
        "ids": {
            "values": cases
        }
    }).opened_range(gte=datespan.startdate,
                    lte=datespan.enddate).date_histogram(
                        'date', datefield, interval).size(0))

    histo_data = cases_after_date.run().aggregations.date.as_facet_result()

    cases_before_date = (CaseES().domain(domains).filter({
        "ids": {
            "values": cases
        }
    }).opened_range(lt=datespan.startdate).size(0)).run().total

    return format_return_data(histo_data, cases_before_date, datespan)
Example #2
0
def get_total_clients_data(domains, datespan, interval, datefield='opened_on'):
    """
    Returns cases that have used SMS.
    Returned based on date case is opened
    """
    sms_cases = (SMSES().to_commcare_case().domain(domains).terms_facet(
        'couch_recipient', 'cases', size=LARGE_ES_NUMBER).size(0))

    cases = [u['term'] for u in sms_cases.run().facet('cases', 'terms')]

    cases_after_date = (CaseES().domain(domains).filter({
        "ids": {
            "values": cases
        }
    }).opened_range(gte=datespan.startdate,
                    lte=datespan.enddate).date_histogram(
                        'date', datefield, interval).size(0))

    histo_data = cases_after_date.run().facet('date', 'entries')

    cases_before_date = (CaseES().domain(domains).filter({
        "ids": {
            "values": cases
        }
    }).opened_range(lt=datespan.startdate).size(0)).run().total

    return format_return_data(histo_data, cases_before_date, datespan)
Example #3
0
def cases_in_last(domain, days, case_type=None):
    """
    Returns the number of open cases that have been modified in the last <days> days
    """
    now = datetime.utcnow()
    then = json_format_datetime(now - timedelta(days=int(days)))
    now = json_format_datetime(now)

    query = CaseES().domain(domain).modified_range(gte=then, lte=now).is_closed(False)
    if case_type:
        query = query.case_type(case_type)
    return query.count()
Example #4
0
def get_patient_display_cache(case_ids):
    """
    For a given set of case_ids, return name and pact_ids
    """
    if len(case_ids) == 0:
        return {}
    case_es = ReportCaseESView(PACT_DOMAIN)
    query = (CaseES().remove_default_filters().domain(PACT_DOMAIN).source(
        ["_id", "name"]).size(len(case_ids)))
    query = query.add_query({"ids": {"values": case_ids}})
    query["script_fields"] = {
        "case_id": {
            "script": "_source._id"
        },
        "pactid": get_report_script_field("pactid"),
        "first_name": get_report_script_field("first_name"),
        "last_name": get_report_script_field("last_name"),
    }
    res = case_es.run_query(query.raw_query)

    from pact.reports.patient import PactPatientInfoReport

    ret = {}
    for entry in res['hits']['hits']:
        case_id = entry['case_id']
        ret[case_id] = entry
        ret[case_id]['url'] = PactPatientInfoReport.get_url(
            *['pact']) + "?patient_id=%s" % case_id

    return ret
Example #5
0
    def get_case_ids(cls, domain, case_type, boundary_date=None):
        """
        Retrieves the case ids in chunks, yielding a list of case ids each time
        until there are none left.
        """
        chunk_size = 100

        query = (CaseES().domain(domain).case_type(case_type).is_closed(
            closed=False).exclude_source().size(chunk_size))

        if boundary_date:
            query = query.server_modified_range(lte=boundary_date)

        result = []

        for case_id in query.scroll():
            if not isinstance(case_id, basestring):
                raise ValueError(
                    "Something is wrong with the query, expected ids only")

            result.append(case_id)
            if len(result) >= chunk_size:
                yield result
                result = []

        if result:
            yield result
Example #6
0
def scroll_case_ids_by_domain_and_case_type(domain, case_type, chunk_size=100):
    """
    Retrieves the case ids in chunks, yielding a list of case ids each time
    until there are none left.

    Only filters on domain and case type, and includes both open and closed cases.
    """
    query = (CaseES().domain(domain).case_type(
        case_type).exclude_source().size(chunk_size))

    result = []

    for case_id in query.scroll():
        if not isinstance(case_id, six.string_types):
            raise ValueError(
                "Something is wrong with the query, expected ids only")
        soft_assert_type_text(case_id)

        result.append(case_id)
        if len(result) >= chunk_size:
            yield result
            result = []

    if result:
        yield result
Example #7
0
def inactive_cases_in_last(domain, days):
    """
    Returns the number of open cases that have been modified in the last <days> days
    """
    now = datetime.utcnow()
    then = json_format_datetime(now - timedelta(days=int(days)))

    return CaseES().domain(domain).modified_range(lt=then).is_closed(False).count()
Example #8
0
 def get_case_ids(cls, domain, case_type, boundary_date=None):
     query = (CaseES()
              .domain(domain)
              .case_type(case_type)
              .is_closed(closed=False)
              .exclude_source())
     if boundary_date is not None:
         query = query.server_modified_range(lte=boundary_date)
     results = query.run()
     return results.doc_ids
Example #9
0
def db_comparisons(request):
    comparison_config = [{
        'description':
        'Users (base_doc is "CouchUser")',
        'couch_db':
        CommCareUser.get_db(),
        'view_name':
        'users/by_username',
        'es_query':
        UserES().remove_default_filter('active').remove_default_filter(
            'mobile_worker').size(0),
        'sql_rows':
        User.objects.count(),
    }, {
        'description': 'Domains (doc_type is "Domain")',
        'couch_db': Domain.get_db(),
        'view_name': 'domain/by_status',
        'es_query': DomainES().size(0),
        'sql_rows': None,
    }, {
        'description':
        'Forms (doc_type is "XFormInstance")',
        'couch_db':
        XFormInstance.get_db(),
        'view_name':
        'couchforms/by_xmlns',
        'es_query':
        FormES().remove_default_filter('has_xmlns').remove_default_filter(
            'has_user').size(0),
        'sql_rows':
        FormData.objects.count(),
    }, {
        'description': 'Cases (doc_type is "CommCareCase")',
        'couch_db': CommCareCase.get_db(),
        'view_name': 'case/by_owner',
        'es_query': CaseES().size(0),
        'sql_rows': None,
    }]

    comparisons = []
    for comp in comparison_config:
        comparisons.append({
            'description':
            comp['description'],
            'couch_docs':
            comp['couch_db'].view(
                comp['view_name'],
                reduce=True,
            ).one()['value'],
            'es_docs':
            comp['es_query'].run().total,
            'sql_rows':
            comp['sql_rows'] if comp['sql_rows'] else 'n/a',
        })
    return json_response(comparisons)
Example #10
0
    def _get_case_ids_from_es(cls, domain, case_type, boundary_date=None):
        query = (CaseES().domain(domain).case_type(case_type).is_closed(
            closed=False).exclude_source().size(100))

        if boundary_date:
            query = query.server_modified_range(lte=boundary_date)

        for case_id in query.scroll():
            if not isinstance(case_id, basestring):
                raise ValueError(
                    "Something is wrong with the query, expected ids only")

            yield case_id
Example #11
0
def es_query_from_get_params(search_params,
                             domain,
                             reserved_query_params=None,
                             doc_type='form'):
    # doc_type can be form or case
    assert doc_type in ['form', 'case']
    es = FormES() if doc_type == 'form' else CaseES()

    query = es.remove_default_filters().domain(domain)

    if doc_type == 'form':
        if 'include_archived' in search_params:
            query = query.filter(
                filters.OR(filters.term('doc_type', 'xforminstance'),
                           filters.term('doc_type', 'xformarchived')))
        else:
            query = query.filter(filters.term('doc_type', 'xforminstance'))

    if '_search' in search_params:
        # This is undocumented usecase by Data export tool and one custom project
        #   Validate that the passed in param is one of these two expected
        _filter = _validate_and_get_es_filter(
            json.loads(search_params['_search']))
        query = query.filter(_filter)

    # filters are actually going to be a more common case
    reserved_query_params = RESERVED_QUERY_PARAMS | set(reserved_query_params
                                                        or [])
    query_params = {
        param: value
        for param, value in search_params.items()
        if param not in reserved_query_params and not param.endswith('__full')
    }
    for consumer in query_param_consumers:
        try:
            payload_filter = consumer.consume_params(query_params)
        except DateTimeError as e:
            raise Http400("Bad query parameter: {}".format(str(e)))

        if payload_filter:
            query = query.filter(payload_filter)

    # add unconsumed filters
    for param, value in query_params.items():
        # assume these fields are analyzed in ES so convert to lowercase
        # Any fields that are not analyzed in ES should be in the ``query_param_consumers`` above
        value = value.lower()
        query = query.filter(filters.term(param, value))

    return query.raw_query
Example #12
0
def get_active_supply_points_data(domains, datespan, interval):
    """
    Returns list of timestamps and active supply points have been modified during
    each interval
    """
    histo_data = []
    for start_date, end_date in intervals(interval, datespan.startdate,
                                          datespan.enddate):
        num_active_supply_points = (CaseES().domain(domains).case_type(
            SUPPLY_POINT_CASE_TYPE).active_in_range(
                gte=start_date, lte=end_date).size(0)).run().total

        if num_active_supply_points > 0:
            histo_data.append(
                get_data_point(num_active_supply_points, end_date))

    return format_return_data(histo_data, 0, datespan)
Example #13
0
def db_comparisons(request):

    def _simple_view_couch_query(db, view_name):
        return db.view(view_name, reduce=True).one()['value']

    comparison_config = [
        {
            'description': 'Users (base_doc is "CouchUser")',
            'couch_docs': _simple_view_couch_query(CommCareUser.get_db(), 'users/by_username'),
            'es_query': UserES().remove_default_filter('active').size(0),
            'sql_rows': User.objects.count(),
        },
        {
            'description': 'Domains (doc_type is "Domain")',
            'couch_docs': _simple_view_couch_query(Domain.get_db(), 'domain/by_status'),
            'es_query': DomainES().size(0),
            'sql_rows': None,
        },
        {
            'description': 'Forms (doc_type is "XFormInstance")',
            'couch_docs': get_number_of_forms_all_domains_in_couch(),
            'es_query': FormES().remove_default_filter('has_xmlns')
                .remove_default_filter('has_user')
                .size(0),
            'sql_rows': FormData.objects.exclude(domain__isnull=True).count(),
        },
        {
            'description': 'Cases (doc_type is "CommCareCase")',
            'couch_docs': get_total_case_count(),
            'es_query': CaseES().size(0),
            'sql_rows': CaseData.objects.exclude(domain__isnull=True).count(),
        }
    ]

    comparisons = []
    for comp in comparison_config:
        comparisons.append({
            'description': comp['description'],
            'couch_docs': comp['couch_docs'],
            'es_docs': comp['es_query'].run().total,
            'sql_rows': comp['sql_rows'] if comp['sql_rows'] else 'n/a',
        })
    return json_response(comparisons)
Example #14
0
 def handle(self, domain, case_type, *args, **options):
     perform_update = True
     query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
         domain).case_type(case_type).is_closed(False).term(
             'name.exact', ''))
     cases_count = query.count()
     print("Number of cases to be updated approximately: %s" % cases_count)
     if not input("Do you wish to update cases (y/n)") == 'y':
         perform_update = False
         if not input("Do you wish to just log updates (y/n)") == 'y':
             exit(0)
     case_ids = query.get_ids()
     print("Begin iterating %s cases" % len(case_ids))
     case_accessor = CaseAccessors(domain)
     case_updates = []
     filename = "case_updates_%s_%s_%s.csv" % (domain, case_type,
                                               datetime.utcnow())
     with open(filename, 'w') as f:
         writer = csv.DictWriter(f, ['case_id', 'new_value'])
         writer.writeheader()
         for case_id in with_progress_bar(case_ids):
             case = case_accessor.get_case(case_id)
             if case.name:
                 continue
             update_to_name = get_last_non_blank_value(case, 'name')
             if update_to_name:
                 writer.writerow({
                     'case_id': case_id,
                     'new_value': update_to_name
                 })
                 if perform_update:
                     case_updates.append((case_id, {
                         'name': update_to_name
                     }, False))
             # update batch when we have the threshold
             if len(case_updates) == CASE_UPDATE_BATCH:
                 bulk_update_cases(domain, case_updates, DEVICE_ID)
                 case_updates = []
         # submit left over case updates
         if case_updates:
             print("Performing last batch of updates")
             bulk_update_cases(domain, case_updates, DEVICE_ID)
         print("Finished. Update details in %s" % filename)
Example #15
0
def get_case_by_identifier(domain, identifier):

    case_accessors = CaseAccessors(domain)

    # Try by any of the allowed identifiers
    for identifier_type in ALLOWED_CASE_IDENTIFIER_TYPES:
        result = CaseES().domain(domain).filter(
            filters.term(identifier_type, identifier)).get_ids()
        if result:
            return case_accessors.get_case(result[0])
    # Try by case id
    try:
        case_by_id = case_accessors.get_case(identifier)
        if case_by_id.domain == domain:
            return case_by_id
    except (CaseNotFound, KeyError):
        pass

    return None
Example #16
0
    def es_results(self):
        query = CaseES('report_cases').domain(self.domain)
        if self.case_type:
            query = query.case_type(self.case_type)

        if self.case_filter:
            query = query.filter(self.case_filter)

        if self.case_status:
            query = query.filter(filters.term("closed", self.case_status == 'closed'))

        query = (
            query
            .set_sorting_block(self.get_sorting_block())
            .start(self.pagination.start)
            .size(self.pagination.count)
        )
        return query.run().hits
    def handle(self, **options):
        domain = options['domain']
        debug = options['debug']
        cleanup = options['cleanup']
        domain_query = CaseES().domain(domain)
        valid_case_ids = set(domain_query.get_ids())
        referenced_case_ids = {
            index['referenced_id']
            for hit in domain_query.source('indices.referenced_id').run().hits
            for index in hit['indices']
        }

        invalid_referenced_ids = referenced_case_ids - valid_case_ids

        if len(invalid_referenced_ids) > ES_MAX_CLAUSE_COUNT:
            print("there's a lot of invalid ids here. ES queries may not handle this well")

        cases_with_invalid_references = (
            domain_query
            .term('indices.referenced_id', invalid_referenced_ids)
            .source(['_id', 'type', 'indices', 'owner_id', 'opened_by', 'xform_ids'])
            .run().hits
        )

        with open(options['filename'], 'w', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            headers = [
                'case id',
                'case type',
                'creating form id',
                'referenced id',
                'referenced_type',
                'index relationship',
                'index identifier',
                'owner id',
                'owner name',
                'opened by id',
                'opened by name',
            ]
            if debug:
                headers.append('app version')
            writer.writerow(headers)

            for case in cases_with_invalid_references:
                for index in case['indices']:
                    if index['referenced_id'] in invalid_referenced_ids:
                        form_id = case['xform_ids'][0]
                        row = [
                            case['_id'],
                            case['type'],
                            form_id,
                            index['referenced_id'],
                            index['referenced_type'],
                            index['relationship'],
                            index['identifier'],
                            case['owner_id'],
                            cached_owner_id_to_display(case['owner_id']),
                            case['opened_by'],
                            cached_owner_id_to_display(case['opened_by']),
                        ]
                        if debug:
                            form = FormAccessors(domain=domain).get_form(form_id)
                            app_version_info = get_app_version_info(
                                domain,
                                form.build_id,
                                form.form_data['@version'],
                                form.metadata,
                            )
                            row.append(app_version_info.build_version)
                        writer.writerow(row)

        if cleanup:
            missing = set()
            deleted = set()
            exists = set()
            for invalid_id in invalid_referenced_ids:
                try:
                    case = CaseAccessors(domain).get_case(invalid_id)
                except CaseNotFound:
                    missing.add(invalid_id)
                else:
                    if case.is_deleted:
                        deleted.add(case)
                    else:
                        exists.add(case)

            for case_to_resync in exists:
                # if the case actually exists resync it to fix the es search
                resave_case(domain, case_to_resync, send_post_save_signal=False)

            if exists:
                print('resynced {} cases that were actually not deleted'.format(len(exists)))

            for case in deleted:
                # delete the deleted case's entire network in one go
                call_command('delete_related_cases', domain, case.case_id)

            for case in cases_with_invalid_references:
                for index in case['indices']:
                    if index['referenced_id'] in missing:
                        # this is just an invalid reference. no recourse but to delete the case itself
                        call_command('delete_related_cases', domain, case['_id'])
Example #18
0
    def handle(self, **options):
        domain = options['domain']
        debug = options['debug']
        cleanup = options['cleanup']
        domain_query = CaseES().domain(domain)
        valid_case_ids = set(domain_query.get_ids())
        referenced_case_ids = {
            index['referenced_id']
            for hit in domain_query.source('indices.referenced_id').run().hits
            for index in hit['indices']
        }

        invalid_referenced_ids = referenced_case_ids - valid_case_ids

        if len(invalid_referenced_ids) > ES_MAX_CLAUSE_COUNT:
            print(
                "there's a lot of invalid ids here. ES queries may not handle this well"
            )

        cases_with_invalid_references = (domain_query.term(
            'indices.referenced_id', invalid_referenced_ids).source([
                '_id', 'type', 'indices', 'owner_id', 'opened_by', 'xform_ids'
            ]).run().hits)

        with open(options['filename'], 'w', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            headers = [
                'case id',
                'case type',
                'creating form id',
                'referenced id',
                'referenced_type',
                'index relationship',
                'index identifier',
                'owner id',
                'owner name',
                'opened by id',
                'opened by name',
            ]
            if debug:
                headers.append('app version')
            writer.writerow(headers)

            for case in cases_with_invalid_references:
                for index in case['indices']:
                    if index['referenced_id'] in invalid_referenced_ids:
                        form_id = case['xform_ids'][0]
                        row = [
                            case['_id'],
                            case['type'],
                            form_id,
                            index['referenced_id'],
                            index['referenced_type'],
                            index['relationship'],
                            index['identifier'],
                            case['owner_id'],
                            cached_owner_id_to_display(case['owner_id']),
                            case['opened_by'],
                            cached_owner_id_to_display(case['opened_by']),
                        ]
                        if debug:
                            form = FormAccessors(
                                domain=domain).get_form(form_id)
                            app_version_info = get_app_version_info(
                                domain,
                                form.build_id,
                                form.form_data['@version'],
                                form.metadata,
                            )
                            row.append(app_version_info.build_version)
                        writer.writerow(row)

        if cleanup:
            missing = set()
            deleted = set()
            exists = set()
            for invalid_id in invalid_referenced_ids:
                try:
                    case = CaseAccessors(domain).get_case(invalid_id)
                except CaseNotFound:
                    missing.add(invalid_id)
                else:
                    if case.is_deleted:
                        deleted.add(case)
                    else:
                        exists.add(case)
Example #19
0
def get_base_case_es_query(start=0, size=DEFAULT_SIZE):
    return (CaseES().remove_default_filters().domain(PACT_DOMAIN).start(
        start).size(size))
Example #20
0
 def get_case_ids(cls, domain, case_type, boundary_date=None):
     query = CaseES().domain(domain).case_type(case_type).is_closed(closed=False).exclude_source()
     if boundary_date is not None:
         query = query.server_modified_range(lte=boundary_date)
     results = query.run()
     return results.doc_ids