def test_write_to_sql_with_checkpoints_multiple_tables( self, writer, checkpoint_manager, caplog): with open('tests/009b_expected_form_1_data.csv', 'r') as f: reader = csv.reader(f) expected_form_1_data = list(reader)[1:] with open('tests/009b_expected_form_2_data.csv', 'r') as f: reader = csv.reader(f) expected_form_2_data = list(reader)[1:] _pull_data(writer, checkpoint_manager, 'tests/009b_integration_multiple.xlsx', None, '2012-05-01') self._check_checkpoints(caplog, ['forms_1', 'final', 'forms_2', 'final']) self._check_checkpoints(caplog, ['forms_1', 'forms_1', 'forms_2', 'forms_2']) self._check_data(writer, expected_form_1_data, 'forms_1') self._check_data(writer, expected_form_2_data, 'forms_2') runs = list( writer.engine.execute( 'SELECT table_name, since_param from commcare_export_runs where query_file_name = %s', 'tests/009b_integration_multiple.xlsx')) assert {r[0]: r[1] for r in runs} == { 'forms_1': '2012-04-27T10:05:55', 'forms_2': '2012-04-27T14:23:50' }
def _load_csv(self, path): with open(path, encoding='utf-8') as f: csv_data = list(csv.reader(f)) headers = csv_data[0] for row_count, row in enumerate(csv_data): csv_data[row_count] = dict(zip(headers, row)) return csv_data[1:]
def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog): with open('tests/009_expected_form_data.csv', 'r') as f: reader = csv.reader(f) expected_form_data = list(reader)[1:] _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', '2012-01-01', '2012-08-01') self._check_checkpoints(caplog, ['forms', 'batch', 'final']) self._check_data(writer, expected_form_data[:16], 'forms') caplog.clear() _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', None, '2012-09-01', batch_size=8) self._check_data(writer, expected_form_data, 'forms') self._check_checkpoints(caplog, ['forms', 'batch', 'final']) runs = list( writer.engine.execute( 'SELECT * from commcare_export_runs where query_file_name = %s', 'tests/009_integration.xlsx')) assert len(runs) == 2, runs
def _trim_csv_columns(path, dest, cols_to_keep): with open(path, 'rb') as source: rdr = csv.reader(source) with open(dest, "wb") as result: wtr = csv.writer(result) for r in rdr: wtr.writerow([r[i] for i in cols_to_keep])
def _get_data_from_blobdb(self, indicator, state_id, month): sync, _ = get_cas_data_blob_file(indicator, state_id, month) csv_file = sync.get_file_from_blobdb() csv_data = list(csv.reader(csv_file)) headers = csv_data[0] rows = csv_data[1:] for row_count, row in enumerate(rows): rows[row_count] = dict(zip(headers, row)) return rows
def _get_data_from_blobdb(self, indicator, state_id, month): sync, _ = get_cas_data_blob_file(indicator, state_id, month) with sync.get_file_from_blobdb() as fileobj: csv_file = io.TextIOWrapper(fileobj, encoding='utf-8') csv_data = list(csv.reader(csv_file)) headers = csv_data[0] rows = csv_data[1:] for row_count, row in enumerate(rows): rows[row_count] = dict(zip(headers, row)) return rows
def generate_repeater_payloads(request, domain): try: email_id = request.POST.get('email_id') repeater_id = request.POST.get('repeater_id') data = csv.reader(request.FILES['payload_ids_file']) payload_ids = [row[0] for row in data] except Exception as e: messages.error(request, _("Could not process the file. %s") % str(e)) else: send_repeater_payloads.delay(repeater_id, payload_ids, email_id) messages.success(request, _("Successfully queued request. You should receive an email shortly.")) return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
def get_user_data_from_csv(self): filepath = self.options.file if not os.path.isfile(filepath): raise Exception("Can't retrieve user data. %s does not exist" % filepath) user_data_list = [] wrong_rows = [] with io.open(filepath, encoding=self.options.encoding, newline='') as csv_file: self.log("Reading file %s ......" % filepath, True) csv_reader = csv.reader(csv_file, delimiter=',') for i, row in enumerate(csv_reader): if i == 0: fieldnames = row else: if len(fieldnames) != len(row): self.log( "Line %s of the CSV has incomplete data %s fields instead of %s" % (i, len(row), len(fieldnames))) wrong_rows.append(row) else: user_data = {'data': {}, 'custom_data': {}} for j, value in enumerate(row): fieldname = fieldnames[j] if 'custom_attribute_' in fieldname: custom_attr_name = fieldname.replace( 'custom_attribute_', '') user_data['custom_data'][ custom_attr_name] = value else: user_data['data'][fieldname] = value user_data_list.append(user_data) if wrong_rows: original_filename = os.path.basename(filepath) wrong_rows_filename = 'invalid_%s' % original_filename wrong_rows_filepath = filepath.replace(original_filename, wrong_rows_filename) wrong_rows_file = open(wrong_rows_filepath, 'w') writer = csv.writer(wrong_rows_file, dialect=csv.excel) writer.writerow(fieldnames) for row in wrong_rows: writer.writerow(row) wrong_rows_file.close() self.log( "Found %s wrong rows, will be skipped from the process so they are exported to %s" % (len(wrong_rows), wrong_rows_filepath), True) return user_data_list
def test_get_inactive_users(self): IcdsFile.objects.filter( data_type='inactive_dashboard_users').all().delete() collect_inactive_dashboard_users() sync = IcdsFile.objects.filter(data_type='inactive_dashboard_users' ).order_by('-file_added').first() with sync.get_file_from_blobdb() as fileobj: zip = zipfile.ZipFile(fileobj, 'r') for zipped_file in zip.namelist(): items_file = zip.open(zipped_file) items_file = io.TextIOWrapper(io.BytesIO(items_file.read())) csv_reader = csv.reader(items_file) data = list(csv_reader) self.assertEqual([['Username', 'Location', 'State']], data)
def handle(self, file_paths, **options): for arg in file_paths: with open(arg, 'r', encoding='utf-8') as file: rows = [] reader = csv.reader(file) header_row = True for row in reader: if header_row: headers = row header_row = False else: rows.append({headers[index]: item for index, item in enumerate(row)}) MALTRow.objects.bulk_create( [MALTRow(**malt_dict) for malt_dict in rows] )
def _read_and_write_csv(): has_temp_file = False try: with io.open(_TEST_CSV_PATH, 'w', encoding='utf-8', newline='') as csv_file: has_temp_file = True csv_writer = csv.writer(csv_file) for _ in range(_TEST_ROW_COUNT): csv_writer.writerow(_TEST_ROW) with io.open(_TEST_CSV_PATH, 'r', encoding='utf-8', newline='') as csv_file: csv_reader = csv.reader(csv_file) for _ in csv_reader: pass finally: if has_temp_file: os.remove(_TEST_CSV_PATH)
def handle(self, file_paths, **options): for arg in file_paths: with open(arg, 'r', encoding='utf-8') as file: rows = [] reader = csv.reader(file) header_row = True for row in reader: if header_row: headers = row header_row = False else: rows.append({ headers[index]: item for index, item in enumerate(row) }) MALTRow.objects.bulk_create( [MALTRow(**malt_dict) for malt_dict in rows])
def handle(self, infile, outfile, *args, **options): self.case_accessor = CaseAccessors('icds-cas') with open(infile, 'r', encoding='utf-8') as old, open(outfile, 'w', encoding='utf-8') as new: reader = csv.reader(old) writer = csv.writer(new) headers = next(reader) writer.writerow(headers) for row in reader: case_id = row[4] hh_id = row[10] if hh_id: person, hh = self.case_accessor.get_cases([case_id, hh_id], ordered=True) else: person = self.case_accessor.get_case(case_id) hh = None if hh: row[18] = hh.get_case_property('name') row[19] = hh.get_case_property('hh_num') row[20] = person.get_case_property('name') writer.writerow(row)
def test_get_inactive_users_data_added(self): CommCareUser(domain='icds-cas', username='******').save() CommCareUser(domain='icds-cas', username='******').save() IcdsFile.objects.filter( data_type='inactive_dashboard_users').all().delete() collect_inactive_dashboard_users() sync = IcdsFile.objects.filter(data_type='inactive_dashboard_users' ).order_by('-file_added').first() with sync.get_file_from_blobdb() as fileobj: zip = zipfile.ZipFile(fileobj, 'r') for zipped_file in zip.namelist(): items_file = zip.open(zipped_file) items_file = io.TextIOWrapper(io.BytesIO(items_file.read())) csv_reader = csv.reader(items_file) data = list(csv_reader) sorted(data, key=lambda x: x[0]) self.assertEqual(['Username', 'Location', 'State'], data[0]) self.assertCountEqual([ ['*****@*****.**', '', ''], ['*****@*****.**', '', ''], ], data[1:])
def handle(self, domain, infile, logfile, *args, **options): self.domain = domain self.case_accessor = CaseAccessors(self.domain) with open(infile, 'r', encoding='utf-8') as f, open(logfile, 'w', encoding='utf-8') as log: reader = csv.reader(f) _, case_prop_name = next(reader) log.write('--------Successful Form Ids----------\n') failed_updates = [] for rows in chunked(reader, 100): updates = [(case_id, {case_prop_name: prop}, False) for case_id, prop in rows] try: xform, cases = bulk_update_cases( self.domain, updates, self.__module__) log.write(xform.form_id + '\n') except Exception as e: print('error') print(six.text_type(e)) failed_updates.extend(u[0] for u in updates) log.write('--------Failed Cases--------------\n') for case_id in failed_updates: log.write(case_id + '\n') log.write('--------Logging Complete--------------\n') print('-------------COMPLETE--------------')
def test_CsvTableWriter(self): with tempfile.NamedTemporaryFile() as file: with CsvTableWriter(file=file) as writer: writer.write_table( TableSpec( **{ 'name': 'foo', 'headings': ['a', 'bjørn', 'c'], 'rows': [ [1, '2', 3], [4, '日本', 6], ] })) with zipfile.ZipFile(file.name, 'r') as output_zip: with output_zip.open('foo.csv') as csv_file: output = csv.reader( io.TextIOWrapper(csv_file, encoding='utf-8')) assert [row for row in output] == [ ['a', 'bjørn', 'c'], ['1', '2', '3'], ['4', '日本', '6'], ]
def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog): def _pull_data(since, until, batch_size=10): args = make_args( query='tests/009_integration.xlsx', output_format='sql', output='', username=os.environ['HQ_USERNAME'], password=os.environ['HQ_API_KEY'], auth_mode='apikey', project='corpora', batch_size=batch_size, since=since, until=until ) # have to mock these to override the pool class otherwise they hold the db connection open writer_patch = mock.patch('commcare_export.cli._get_writer', return_value=writer) checkpoint_patch = mock.patch('commcare_export.cli._get_checkpoint_manager', return_value=checkpoint_manager) with writer_patch, checkpoint_patch: main_with_args(args) with open('tests/009_expected_form_data.csv', 'r') as f: reader = csv.reader(f) expected_form_data = list(reader)[1:] _pull_data('2012-01-01', '2012-08-01') self._check_checkpoints(caplog, ['batch', 'batch', 'final']) self._check_data(writer, expected_form_data[:16]) caplog.clear() _pull_data(None, '2012-09-01', batch_size=20) self._check_data(writer, expected_form_data) self._check_checkpoints(caplog, ['batch', 'final']) runs = list(writer.engine.execute('SELECT * from commcare_export_runs')) assert len(runs) == 2
def _data(self, name, delimiter=',', encoding='utf-8'): with self._open(name, encoding=encoding) as csv_file: result = list(csv.reader(csv_file, delimiter=delimiter)) return result
def get_user_data_from_csv(self): filepath = self.options.file if not os.path.isfile(filepath): raise Exception("Can't retrieve user data. %s does not exist" % filepath) user_data_list = [] wrong_rows = [] with io.open(filepath, encoding=self.options.encoding, newline='') as csv_file: self.log("Reading file %s ......" % filepath, True) csv_reader = csv.reader(csv_file, delimiter=',') for i, row in enumerate(csv_reader): if i == 0: fieldnames = row # Attribute mapping if applies if ATTRIBUTE_MAPPINGS: names_to_change = ATTRIBUTE_MAPPINGS.keys() for j, value in enumerate(fieldnames): if fieldnames[j] in names_to_change: fieldnames[j] = ATTRIBUTE_MAPPINGS[ fieldnames[j]] else: if len(fieldnames) != len(row): self.log( "Line %s of the CSV has incomplete data %s fields instead of %s" % (i, len(row), len(fieldnames))) wrong_rows.append(row) else: user_data = {'data': {}, 'custom_data': {}} for j, value in enumerate(row): fieldname = fieldnames[j] if 'custom_attribute_' in fieldname: custom_attr_name = fieldname.replace( 'custom_attribute_', '') user_data['custom_data'][ custom_attr_name] = value else: user_data['data'][fieldname] = value user_data_list.append(user_data) # Now apply manipulations if SPLIT_ATTRIBUTES_BY_FIRST_CHAR.items(): for key, data in SPLIT_ATTRIBUTES_BY_FIRST_CHAR.items(): if key in fieldnames: char = data['char'] maxsplit = len(data['destination']) for j, user_data in enumerate(user_data_list.copy()): if 'custom_attribute_' in key: value = user_data['custom_data'][key.replace( 'custom_attribute_', '')] else: value = user_data['data'][key] list_values = value.split(char, maxsplit) for z, dest_key in enumerate(data['destination']): value = "" if z < len(list_values): value = list_values[z] if 'custom_attribute_' in dest_key: dest_key = dest_key.replace( 'custom_attribute_', '') if value or not dest_key in user_data[ 'custom_data'] or not user_data[ 'custom_data'][dest_key]: user_data['custom_data'][ dest_key] = value else: if value or not dest_key in user_data[ 'data'] or not user_data['data'][ dest_key]: user_data['data'][dest_key] = value user_data_list[j] = user_data if wrong_rows: original_filename = os.path.basename(filepath) wrong_rows_filename = 'invalid_%s' % original_filename wrong_rows_filepath = filepath.replace(original_filename, wrong_rows_filename) wrong_rows_file = open(wrong_rows_filepath, 'w') writer = csv.writer(wrong_rows_file, dialect=csv.excel) writer.writerow(fieldnames) for row in wrong_rows: writer.writerow(row) wrong_rows_file.close() self.log( "Found %s wrong rows, will be skipped from the process so they are exported to %s" % (len(wrong_rows), wrong_rows_filepath), True) return user_data_list
def test_can_read_from_StringIO(self): with io.StringIO('ä,b\nc') as csv_stream: csv_reader = csv.reader(csv_stream, delimiter=',') actual_rows = list(csv_reader) self.assertEqual([['ä', 'b'], ['c']], actual_rows)
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): f = UTF8Recoder(f, encoding) self.reader = csv.reader(f, dialect=dialect, **kwds)