def test_get_editColumnOrder(self): addColsToSilo(self.silo, ['a', 'b', 'c', 'd', 'e', 'f']) hideSiloColumns(self.silo, ['b', 'e']) self.silo.rows_to_hide = json.dumps([ { "logic": "BLANKCHAR", "operation": "", "number": "", "conditional": "---", }, { "logic": "AND", "operation": "empty", "number": "", "conditional": ["a", "b"], }, { "logic": "OR", "operation": "empty", "number": "", "conditional": ["c", "d"], } ]) self.silo.save() request = self.factory.get(self.url) request.user = self.user request._dont_enforce_csrf_checks = True response = addColumnFilter(request, self.silo.pk) self.assertEqual(response.status_code, 200)
def test_post_editColumnOrder(self): addColsToSilo(self.silo, ['a', 'b', 'c', 'd', 'e', 'f']) hideSiloColumns(self.silo, ['b', 'e']) cols_ordered = ['c', 'f', 'a', 'd'] response = self.client.post('/edit_column_order/%s/' % str(self.silo.pk), data={'columns': cols_ordered}) self.assertEqual(response.status_code, 302) self.assertEqual(getSiloColumnNames(self.silo.pk), ['c', 'f', 'a', 'd']) response = self.client.post('/edit_column_order/0/', data={'columns': cols_ordered}) self.assertEqual(response.status_code, 302)
def copy_from_cache(cache_silo, silo, read): cached_data = db.label_value_store.find({ 'silo_id': cache_silo.id}) for record in cached_data: record.pop('_id') record['silo_id'] = silo.id record['read_id'] = read.id db.label_value_store.insert(record) cols = [] col_types = {} for col in json.loads(cache_silo.columns): cols.append(col['name']) col_types[col['name']] = col['type'] addColsToSilo(silo, cols, col_types)
def getCommCareDataHelper(conf): """ Use fetch and request CommCareData to store all of the case data domain -- the domain name used for a commcare project auth -- the authorization required auth_header -- True = use Header, False = use Digest authorization total_cases -- total cases to get silo - silo to put the data into read -- read that the data is apart of """ # CommCare has a max limit of 50 for report downloads if conf.download_type == 'commcare_report': record_limit = 50 else: record_limit = 100 # replace the record limit and fetch the data base_url = conf.base_url.replace('limit=1', 'limit=' + str(record_limit)) if conf.download_type == 'commcare_form': cache_obj = CommCareCache.objects.get(form_id=conf.form_id) base_url += '&received_on_start=' + \ cache_obj.last_updated.isoformat()[:-6] data_raw = fetchCommCareData(conf.to_dict(), base_url, 0, record_limit) data_collects = data_raw.apply_async() data_retrieval = [v.get() for v in data_collects] columns = set() for data in data_retrieval: columns = columns.union(data) # Add new columns to the list of current columns this is slower because # Order has to be maintained (2n instead of n) silo = Silo.objects.get(pk=conf.silo_id) addColsToSilo(silo, columns) return (messages.SUCCESS, "CommCare data imported successfully", columns)
def getCommCareCaseData(domain, auth, auth_header, total_cases, silo, read): """ Use fetch and request CommCareData to store all of the case data domain -- the domain name used for a commcare project auth -- the authorization required auth_header -- True = use Header, False = use Digest authorization total_cases -- total cases to get silo - silo to put the data into read -- read that the data is apart of """ RECORDS_PER_REQUEST = 100 base_url = "https://www.commcarehq.org/a/"+ domain\ +"/api/v0.5/case/?format=JSON&limit="+str(RECORDS_PER_REQUEST) data_raw = fetchCommCareData(base_url, auth, auth_header,\ 0, total_cases, RECORDS_PER_REQUEST, silo.id, read.id) data_collects = data_raw.apply_async() data_retrieval = [v.get() for v in data_collects] columns = set() for data in data_retrieval: columns = columns.union(data) #correct the columns for column in columns: if "." in column: columns.remove(column) columns.add(column.replace(".", "_")) if "$" in column: columns.remove(column) columns.add(column.replace("$", "USD")) try: columns.remove("") except KeyError as e: pass try: columns.remove("silo_id") except KeyError as e: pass try: columns.remove("read_id") except KeyError as e: pass try: columns.remove("id") columns.add("user_assigned_id") except KeyError as e: pass try: columns.remove("_id") columns.add("user_assigned_id") except KeyError as e: pass try: columns.remove("edit_date") columns.add("editted_date") except KeyError as e: pass try: columns.remove("create_date") columns.add("created_date") except KeyError as e: pass #add new columns to the list of current columns this is slower because #order has to be maintained (2n instead of n) addColsToSilo(silo, columns) hideSiloColumns(silo, ["case_id"]) return (messages.SUCCESS, "CommCare cases imported successfully", columns)
def import_from_gsheet_helper(user, silo_id, silo_name, spreadsheet_id, sheet_id=None, partialcomplete=False): msgs = [] if spreadsheet_id is None: msgs.append({ "level": messages.ERROR, "msg": "A Google Spreadsheet is not selected to import data from.", "redirect": reverse('index') }) credential_obj = _get_credential_object(user) if not isinstance(credential_obj, OAuth2Credentials): msgs.append(credential_obj) return msgs defaults = { "name": silo_name, "description": "Google Sheet Import", "public": False, "owner": user } silo, created = Silo.objects.get_or_create( pk=None if silo_id == '0' else silo_id, defaults=defaults) msgs.append({"silo_id": silo.id}) service = _get_authorized_service(credential_obj) # fetch the google spreadsheet metadata try: spreadsheet = service.spreadsheets().get( spreadsheetId=spreadsheet_id).execute() except HttpAccessTokenRefreshError: return [_get_credential_object(user, True)] except Exception as e: error = json.loads(e.content).get("error") msg = "%s: %s" % (error.get("status"), error.get("message")) msgs.append({"level": messages.ERROR, "msg": msg}) return msgs spreadsheet_name = spreadsheet.get("properties", {}).get("title", "") gsheet_read = _get_or_create_read("GSheet Import", spreadsheet_name, "Google Spreadsheet Import", spreadsheet_id, user, silo) sheet_name = "Sheet1" if sheet_id: gsheet_read.gsheet_id = sheet_id gsheet_read.save() if gsheet_read.gsheet_id: sheets = spreadsheet.get("sheets", None) for sheet in sheets: properties = sheet.get("properties", None) if properties: if str(properties.get("sheetId")) == str( gsheet_read.gsheet_id): sheet_name = properties.get("title") headers = [] data = None combine_cols = False # Fetch data from gsheet try: result = service.spreadsheets().values().get( spreadsheetId=spreadsheet_id, range=sheet_name).execute() data = result.get("values", []) except Exception as e: logger.error(e) msgs.append({ "level": messages.ERROR, "msg": "Something went wrong 22: %s" % e, "redirect": None }) return msgs unique_fields = silo.unique_fields.all() skipped_rows = set() lvss = [] for r, row in enumerate(data): if r == 0: headers = [] for header in row: header = cleanKey(header) headers.append(header) addColsToSilo(silo, headers) continue filter_criteria = {} # build filter_criteria if unique field(s) have been setup for this silo for unique_field in unique_fields: try: filter_criteria.update( {unique_field.name: row[headers.index(unique_field.name)]}) except KeyError: pass except ValueError: pass if filter_criteria: filter_criteria.update({'silo_id': silo.id}) # if a row is found, then fetch and update it # if no row is found then create a new one # if multiple rows are found then skip b/c not sure which one to update try: lvs = LabelValueStore.objects.get(**filter_criteria) lvs.edit_date = timezone.now() except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k, v)) continue else: lvs = LabelValueStore() for c, col in enumerate(row): try: key = headers[c] except IndexError as e: #this happens when a column header is missing gsheet continue if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" val = smart_str(row[c], strings_only=True) key = smart_str(key) val = val.strip() setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) lvs.silo_id = silo.id lvs.read_id = gsheet_read.id lvs.create_date = timezone.now() lvs = calculateFormulaCell(lvs, silo) if partialcomplete: lvss.append(lvs) else: lvs.save() if skipped_rows: msgs.append({ "level": messages.WARNING, "msg": "Skipped updating/adding records where %s because there are already multiple records." % ",".join(str(s) for s in skipped_rows) }) msgs.append({"level": messages.SUCCESS, "msg": "Operation successful"}) if partialcomplete: return (lvss, msgs) return msgs
def import_from_gsheet_helper(user, silo_id, silo_name, spreadsheet_id, sheet_id=None, partialcomplete=False): msgs = [] if spreadsheet_id is None: msgs.append({ 'level': messages.ERROR, 'msg': 'A Google Spreadsheet is not selected to import data from.', 'redirect': reverse('index') }) credential_obj = _get_credential_object(user) if not isinstance(credential_obj, OAuth2Credentials): msgs.append(credential_obj) return msgs defaults = { 'name': silo_name, 'description': 'Google Sheet Import', 'public': False, 'owner': user } silo, created = Silo.objects.get_or_create( pk=None if silo_id == '0' else silo_id, defaults=defaults) msgs.append({'silo_id': silo.id}) metadata, error = _get_gsheet_metadata(credential_obj, spreadsheet_id, user) if error: if 'credential' in error: return error.get('credential') else: msgs.append(error) return msgs spreadsheet_name = metadata.get('properties', {}).get('title', '') gsheet_read = _get_or_create_read('GSheet Import', spreadsheet_name, 'Google Spreadsheet Import', spreadsheet_id, user, silo) if sheet_id: gsheet_read.gsheet_id = sheet_id gsheet_read.save() sheet_name = 'Sheet1' if gsheet_read.gsheet_id: sheets = metadata.get('sheets', None) for sheet in sheets: properties = sheet.get('properties', None) if properties: if str(properties.get('sheetId')) == str( gsheet_read.gsheet_id): sheet_name = properties.get('title') values, error = _fetch_data_gsheet(credential_obj, spreadsheet_id, sheet_name) if error: msgs.append(error) return msgs unique_fields = silo.unique_fields.all() skipped_rows = set() headers = [] lvss = [] # get the column names header = values.pop(0) for h in header: h = cleanKey(h) headers.append(h) data = _convert_gsheet_data(headers, values) for r, row in enumerate(data): filter_criteria = {} # build filter_criteria if unique field(s) have been setup for this silo for uf in unique_fields: try: if str(row[uf.name]).isdigit(): filter_criteria.update({str(uf.name): int(row[uf.name])}) else: filter_criteria.update({str(uf.name): str(row[uf.name])}) except AttributeError as e: logger.warning(e) if filter_criteria: filter_criteria.update({'silo_id': silo.id}) try: lvs = LabelValueStore.objects.get(**filter_criteria) lvs.edit_date = timezone.now() except LabelValueStore.DoesNotExist: lvs = LabelValueStore() except LabelValueStore.MultipleObjectsReturned: for k, v in filter_criteria.iteritems(): skipped_rows.add('{}={}'.format(k, v)) continue else: lvs = LabelValueStore() # add the key and values to the document row = clean_data_obj(row) for key, val in row.iteritems(): val = smart_str(val, strings_only=True) setattr(lvs, key, val) lvs.silo_id = silo.id lvs.read_id = gsheet_read.id lvs.create_date = timezone.now() lvs = calculateFormulaCell(lvs, silo) if partialcomplete: lvss.append(lvs) else: lvs.save() addColsToSilo(silo, headers) if skipped_rows: skipped_str = ','.join(str(s) for s in skipped_rows) msg = 'Skipped updating/adding records where {} because there are ' \ 'already multiple records.'.format(skipped_str) msgs.append({'level': messages.WARNING, 'msg': msg}) msgs.append({'level': messages.SUCCESS, 'msg': 'Operation successful'}) if partialcomplete: return lvss, msgs return msgs
def handle(self, *args, **options): frequency = options['frequency'] if frequency != "daily" and frequency != "weekly": return self.stdout.write( "Frequency argument can either be 'daily' or 'weekly'") silos = Silo.objects.filter( reads__autopull_frequency__isnull=False, reads__autopull_frequency=frequency, reads__type__read_type="CommCare").distinct() commcare_type_id = ReadType.objects.get(read_type='CommCare').id for silo in silos: for read in silo.reads.all(): if read.type.read_type != 'CommCare': print 'skipping read', read continue conf = CommCareImportConfig(silo_id=silo.id, read_id=read.id, tables_user_id=silo.owner.pk, base_url=read.read_url, update=True) try: conf.set_auth_header() except Exception as e: self.stdout.write( 'No commcare api key for silo %s, read "%s"' % (silo.pk, read.pk)) continue # get/build metadata based on download_type if 'case' in conf.base_url: last_data_retrieved = str(getNewestDataDate(silo.id))[:10] conf.base_url += "&date_modified_start=" + last_data_retrieved conf.download_type = 'case' conf.record_count = get_commcare_record_count(conf) elif 'configurablereportdata' in conf.base_url: conf.download_type = 'commcare_report' url_parts = conf.base_url.split('/') conf.project = url_parts[4] conf.report_id = url_parts[8] conf.record_count = get_commcare_record_count(conf) elif '/form/' in conf.base_url: cache_obj = CommCareCache.objects.get( form_id=read.resource_id) conf.download_type = 'commcare_form' conf.project = cache_obj.project conf.form_name = cache_obj.form_name conf.form_id = cache_obj.form_id conf.base_url = read.read_url + '&received_on_start=' + cache_obj.last_updated.isoformat( )[:-6] conf.record_count = get_commcare_record_count(conf) response = requests.get(conf.base_url, headers=conf.auth_header) if response.status_code == 401: commcare_token.delete() self.stdout.write( 'Incorrect commcare api key for silo %s, read %s' % (silo.pk, read.pk)) continue elif response.status_code != 200: self.stdout.write( 'Failure retrieving commcare data for silo %s, read %s' % (silo.pk, read.pk)) continue if '/form/' in conf.base_url: client = MongoClient(settings.MONGODB_URI) db = client.get_database(settings.TOLATABLES_MONGODB_NAME) db.label_value_store.delete_many({'read_id': read.id}) cache_silo = Silo.objects.get(pk=cache_obj.silo.id) copy_from_cache(cache_silo, silo, read) #Now call the update data function in commcare tasks conf.base_url = read.read_url conf.use_token = True data_raw = fetchCommCareData(conf.to_dict(), conf.base_url, 0, 50) data_collects = data_raw.apply_async() new_colnames = set() for colset in [set(v.get()) for v in data_collects]: new_colnames.update(colset) cleaned_colnames = [cleanKey(name) for name in new_colnames] addColsToSilo(silo, cleaned_colnames) self.stdout.write( 'Successfully fetched silo %s, read %s from CommCare' % (silo.pk, read.pk))
def handle(self, *args, **options): disabled_projects = [] # Get a list of projects to import data from if options['project']: project = [options['project']] else: read_urls = Read.objects.filter(type__read_type='CommCare') \ .values_list('read_url', flat=True) projects = set() for read_url in read_urls: projects.add(read_url.split('/')[4]) base_url = 'https://www.commcarehq.org/a/%s/api/v0.5/form/' \ '?limit=1&received_on_start=%s' for project in projects: if project in disabled_projects: continue print 'Downloading ' + project try: cache_obj = CommCareCache.objects.filter(project=project) \ .first() last_updated = cache_obj.last_updated.isoformat()[:-6] except (IndexError, AttributeError): last_updated = '1980-01-01' conf = CommCareImportConfig( project=project, tables_user_id=User.objects.get(email='*****@*****.**', username='******').id, base_url=base_url % (project, last_updated), download_type='commcare_form', update=True, use_token=True, for_cache=True) try: conf.set_auth_header() except ThirdPartyTokens.DoesNotExist: self.stdout.write('CommCare Token is not in the DB') continue response = requests.get(conf.base_url, headers=conf.auth_header) if response.status_code == 401: commcare_token = ThirdPartyTokens.objects.get(token=conf.token) commcare_token.delete() self.stdout.write('Incorrect CommCare api key for project %s' % project) continue elif response.status_code != 200: self.stdout.write( 'Failure retrieving CommCare data for project %s' % project) conf.record_count = get_commcare_record_count(conf) if conf.record_count == 0: self.stdout.write( 'Fetching of record counts failed. Skipping project %s' % project) continue # Trigger the data retrieval and retrieve the column names url = conf.base_url.replace('limit=1', 'limit=100') data_raw = fetchCommCareData(conf.to_dict(), url, 0, 100) data_collects = data_raw.apply_async() data_retrieval = [v.get() for v in data_collects] # Add columns to the Silo and max date to Cache silo_ref = {} save_date = datetime.datetime(1980, 1, 1).replace(tzinfo=pytz.UTC) for column_dict, max_date in data_retrieval: for silo_id in column_dict: cleaned_colnames = [ cleanKey(name) for name in column_dict[silo_id] ] if silo_id in silo_ref: silo = silo_ref[silo_id] addColsToSilo(silo, cleaned_colnames) else: try: silo = Silo.objects.get(pk=silo_id) silo_ref[silo_id] = silo addColsToSilo(silo, cleaned_colnames) except ValueError as e: print 'ERROR: Null value for silo_id' print 'Conf object: ', conf print 'Column dict', column_dict print 'Error message: ', e save_date = max( parse(max_date).replace(tzinfo=pytz.UTC), save_date) for cache_obj in CommCareCache.objects.filter(project=project): cache_obj.last_updated = save_date cache_obj.save() self.stdout.write('Successfully fetched project %s from CommCare' % conf.project)