Beispiel #1
0
    def test_write_to_sql_with_checkpoints_multiple_tables(
            self, writer, checkpoint_manager, caplog):
        with open('tests/009b_expected_form_1_data.csv', 'r') as f:
            reader = csv.reader(f)
            expected_form_1_data = list(reader)[1:]

        with open('tests/009b_expected_form_2_data.csv', 'r') as f:
            reader = csv.reader(f)
            expected_form_2_data = list(reader)[1:]

        _pull_data(writer, checkpoint_manager,
                   'tests/009b_integration_multiple.xlsx', None, '2012-05-01')
        self._check_checkpoints(caplog,
                                ['forms_1', 'final', 'forms_2', 'final'])
        self._check_checkpoints(caplog,
                                ['forms_1', 'forms_1', 'forms_2', 'forms_2'])
        self._check_data(writer, expected_form_1_data, 'forms_1')
        self._check_data(writer, expected_form_2_data, 'forms_2')

        runs = list(
            writer.engine.execute(
                'SELECT table_name, since_param from commcare_export_runs where query_file_name = %s',
                'tests/009b_integration_multiple.xlsx'))
        assert {r[0]: r[1]
                for r in runs} == {
                    'forms_1': '2012-04-27T10:05:55',
                    'forms_2': '2012-04-27T14:23:50'
                }
 def _load_csv(self, path):
     with open(path, encoding='utf-8') as f:
         csv_data = list(csv.reader(f))
         headers = csv_data[0]
         for row_count, row in enumerate(csv_data):
             csv_data[row_count] = dict(zip(headers, row))
     return csv_data[1:]
Beispiel #3
0
 def _load_csv(self, path):
     with open(path, encoding='utf-8') as f:
         csv_data = list(csv.reader(f))
         headers = csv_data[0]
         for row_count, row in enumerate(csv_data):
             csv_data[row_count] = dict(zip(headers, row))
     return csv_data[1:]
Beispiel #4
0
    def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager,
                                           caplog):
        with open('tests/009_expected_form_data.csv', 'r') as f:
            reader = csv.reader(f)
            expected_form_data = list(reader)[1:]

        _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx',
                   '2012-01-01', '2012-08-01')
        self._check_checkpoints(caplog, ['forms', 'batch', 'final'])
        self._check_data(writer, expected_form_data[:16], 'forms')

        caplog.clear()
        _pull_data(writer,
                   checkpoint_manager,
                   'tests/009_integration.xlsx',
                   None,
                   '2012-09-01',
                   batch_size=8)
        self._check_data(writer, expected_form_data, 'forms')
        self._check_checkpoints(caplog, ['forms', 'batch', 'final'])

        runs = list(
            writer.engine.execute(
                'SELECT * from commcare_export_runs where query_file_name = %s',
                'tests/009_integration.xlsx'))
        assert len(runs) == 2, runs
Beispiel #5
0
def _trim_csv_columns(path, dest, cols_to_keep):
    with open(path, 'rb') as source:
        rdr = csv.reader(source)
        with open(dest, "wb") as result:
            wtr = csv.writer(result)
            for r in rdr:
                wtr.writerow([r[i] for i in cols_to_keep])
Beispiel #6
0
def _trim_csv_columns(path, dest, cols_to_keep):
    with open(path, 'rb') as source:
        rdr = csv.reader(source)
        with open(dest, "wb") as result:
            wtr = csv.writer(result)
            for r in rdr:
                wtr.writerow([r[i] for i in cols_to_keep])
Beispiel #7
0
 def _get_data_from_blobdb(self, indicator, state_id, month):
     sync, _ = get_cas_data_blob_file(indicator, state_id, month)
     csv_file = sync.get_file_from_blobdb()
     csv_data = list(csv.reader(csv_file))
     headers = csv_data[0]
     rows = csv_data[1:]
     for row_count, row in enumerate(rows):
         rows[row_count] = dict(zip(headers, row))
     return rows
Beispiel #8
0
 def _get_data_from_blobdb(self, indicator, state_id, month):
     sync, _ = get_cas_data_blob_file(indicator, state_id, month)
     with sync.get_file_from_blobdb() as fileobj:
         csv_file = io.TextIOWrapper(fileobj, encoding='utf-8')
         csv_data = list(csv.reader(csv_file))
     headers = csv_data[0]
     rows = csv_data[1:]
     for row_count, row in enumerate(rows):
         rows[row_count] = dict(zip(headers, row))
     return rows
 def _get_data_from_blobdb(self, indicator, state_id, month):
     sync, _ = get_cas_data_blob_file(indicator, state_id, month)
     with sync.get_file_from_blobdb() as fileobj:
         csv_file = io.TextIOWrapper(fileobj, encoding='utf-8')
         csv_data = list(csv.reader(csv_file))
     headers = csv_data[0]
     rows = csv_data[1:]
     for row_count, row in enumerate(rows):
         rows[row_count] = dict(zip(headers, row))
     return rows
Beispiel #10
0
def generate_repeater_payloads(request, domain):
    try:
        email_id = request.POST.get('email_id')
        repeater_id = request.POST.get('repeater_id')
        data = csv.reader(request.FILES['payload_ids_file'])
        payload_ids = [row[0] for row in data]
    except Exception as e:
        messages.error(request, _("Could not process the file. %s") % str(e))
    else:
        send_repeater_payloads.delay(repeater_id, payload_ids, email_id)
        messages.success(request, _("Successfully queued request. You should receive an email shortly."))
    return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
    def get_user_data_from_csv(self):
        filepath = self.options.file
        if not os.path.isfile(filepath):
            raise Exception("Can't retrieve user data. %s does not exist" %
                            filepath)

        user_data_list = []
        wrong_rows = []

        with io.open(filepath, encoding=self.options.encoding,
                     newline='') as csv_file:
            self.log("Reading file %s ......" % filepath, True)
            csv_reader = csv.reader(csv_file, delimiter=',')
            for i, row in enumerate(csv_reader):

                if i == 0:
                    fieldnames = row
                else:
                    if len(fieldnames) != len(row):
                        self.log(
                            "Line %s of the CSV has incomplete data %s fields instead of %s"
                            % (i, len(row), len(fieldnames)))
                        wrong_rows.append(row)
                    else:
                        user_data = {'data': {}, 'custom_data': {}}
                        for j, value in enumerate(row):
                            fieldname = fieldnames[j]
                            if 'custom_attribute_' in fieldname:
                                custom_attr_name = fieldname.replace(
                                    'custom_attribute_', '')
                                user_data['custom_data'][
                                    custom_attr_name] = value
                            else:
                                user_data['data'][fieldname] = value
                        user_data_list.append(user_data)

        if wrong_rows:
            original_filename = os.path.basename(filepath)
            wrong_rows_filename = 'invalid_%s' % original_filename
            wrong_rows_filepath = filepath.replace(original_filename,
                                                   wrong_rows_filename)
            wrong_rows_file = open(wrong_rows_filepath, 'w')
            writer = csv.writer(wrong_rows_file, dialect=csv.excel)
            writer.writerow(fieldnames)
            for row in wrong_rows:
                writer.writerow(row)
            wrong_rows_file.close()
            self.log(
                "Found %s wrong rows, will be skipped from the process so they are exported to %s"
                % (len(wrong_rows), wrong_rows_filepath), True)

        return user_data_list
Beispiel #12
0
    def test_get_inactive_users(self):
        IcdsFile.objects.filter(
            data_type='inactive_dashboard_users').all().delete()
        collect_inactive_dashboard_users()
        sync = IcdsFile.objects.filter(data_type='inactive_dashboard_users'
                                       ).order_by('-file_added').first()
        with sync.get_file_from_blobdb() as fileobj:
            zip = zipfile.ZipFile(fileobj, 'r')
            for zipped_file in zip.namelist():
                items_file = zip.open(zipped_file)
                items_file = io.TextIOWrapper(io.BytesIO(items_file.read()))
                csv_reader = csv.reader(items_file)
                data = list(csv_reader)

                self.assertEqual([['Username', 'Location', 'State']], data)
Beispiel #13
0
 def handle(self, file_paths, **options):
     for arg in file_paths:
         with open(arg, 'r', encoding='utf-8') as file:
             rows = []
             reader = csv.reader(file)
             header_row = True
             for row in reader:
                 if header_row:
                     headers = row
                     header_row = False
                 else:
                     rows.append({headers[index]: item for index, item in enumerate(row)})
             MALTRow.objects.bulk_create(
                 [MALTRow(**malt_dict) for malt_dict in rows]
             )
Beispiel #14
0
def _read_and_write_csv():
    has_temp_file = False
    try:
        with io.open(_TEST_CSV_PATH, 'w', encoding='utf-8', newline='') as csv_file:
            has_temp_file = True
            csv_writer = csv.writer(csv_file)
            for _ in range(_TEST_ROW_COUNT):
                csv_writer.writerow(_TEST_ROW)
        with io.open(_TEST_CSV_PATH, 'r', encoding='utf-8', newline='') as csv_file:
            csv_reader = csv.reader(csv_file)
            for _ in csv_reader:
                pass
    finally:
        if has_temp_file:
            os.remove(_TEST_CSV_PATH)
Beispiel #15
0
def _read_and_write_csv():
    has_temp_file = False
    try:
        with io.open(_TEST_CSV_PATH, 'w', encoding='utf-8',
                     newline='') as csv_file:
            has_temp_file = True
            csv_writer = csv.writer(csv_file)
            for _ in range(_TEST_ROW_COUNT):
                csv_writer.writerow(_TEST_ROW)
        with io.open(_TEST_CSV_PATH, 'r', encoding='utf-8',
                     newline='') as csv_file:
            csv_reader = csv.reader(csv_file)
            for _ in csv_reader:
                pass
    finally:
        if has_temp_file:
            os.remove(_TEST_CSV_PATH)
Beispiel #16
0
 def handle(self, file_paths, **options):
     for arg in file_paths:
         with open(arg, 'r', encoding='utf-8') as file:
             rows = []
             reader = csv.reader(file)
             header_row = True
             for row in reader:
                 if header_row:
                     headers = row
                     header_row = False
                 else:
                     rows.append({
                         headers[index]: item
                         for index, item in enumerate(row)
                     })
             MALTRow.objects.bulk_create(
                 [MALTRow(**malt_dict) for malt_dict in rows])
 def handle(self, infile, outfile, *args, **options):
     self.case_accessor = CaseAccessors('icds-cas')
     with open(infile, 'r', encoding='utf-8') as old, open(outfile, 'w', encoding='utf-8') as new:
         reader = csv.reader(old)
         writer = csv.writer(new)
         headers = next(reader)
         writer.writerow(headers)
         for row in reader:
             case_id = row[4]
             hh_id = row[10]
             if hh_id:
                 person, hh = self.case_accessor.get_cases([case_id, hh_id], ordered=True)
             else:
                 person = self.case_accessor.get_case(case_id)
                 hh = None
             if hh:
                 row[18] = hh.get_case_property('name')
                 row[19] = hh.get_case_property('hh_num')
             row[20] = person.get_case_property('name')
             writer.writerow(row)
Beispiel #18
0
 def test_get_inactive_users_data_added(self):
     CommCareUser(domain='icds-cas',
                  username='******').save()
     CommCareUser(domain='icds-cas',
                  username='******').save()
     IcdsFile.objects.filter(
         data_type='inactive_dashboard_users').all().delete()
     collect_inactive_dashboard_users()
     sync = IcdsFile.objects.filter(data_type='inactive_dashboard_users'
                                    ).order_by('-file_added').first()
     with sync.get_file_from_blobdb() as fileobj:
         zip = zipfile.ZipFile(fileobj, 'r')
         for zipped_file in zip.namelist():
             items_file = zip.open(zipped_file)
             items_file = io.TextIOWrapper(io.BytesIO(items_file.read()))
             csv_reader = csv.reader(items_file)
             data = list(csv_reader)
             sorted(data, key=lambda x: x[0])
             self.assertEqual(['Username', 'Location', 'State'], data[0])
             self.assertCountEqual([
                 ['*****@*****.**', '', ''],
                 ['*****@*****.**', '', ''],
             ], data[1:])
 def handle(self, domain, infile, logfile, *args, **options):
     self.domain = domain
     self.case_accessor = CaseAccessors(self.domain)
     with open(infile, 'r', encoding='utf-8') as f, open(logfile, 'w', encoding='utf-8') as log:
         reader = csv.reader(f)
         _, case_prop_name = next(reader)
         log.write('--------Successful Form Ids----------\n')
         failed_updates = []
         for rows in chunked(reader, 100):
             updates = [(case_id, {case_prop_name: prop}, False) for case_id, prop in rows]
             try:
                 xform, cases = bulk_update_cases(
                     self.domain, updates, self.__module__)
                 log.write(xform.form_id + '\n')
             except Exception as e:
                 print('error')
                 print(six.text_type(e))
                 failed_updates.extend(u[0] for u in updates)
         log.write('--------Failed Cases--------------\n')
         for case_id in failed_updates:
             log.write(case_id + '\n')
         log.write('--------Logging Complete--------------\n')
         print('-------------COMPLETE--------------')
Beispiel #20
0
    def test_CsvTableWriter(self):
        with tempfile.NamedTemporaryFile() as file:
            with CsvTableWriter(file=file) as writer:
                writer.write_table(
                    TableSpec(
                        **{
                            'name': 'foo',
                            'headings': ['a', 'bjørn', 'c'],
                            'rows': [
                                [1, '2', 3],
                                [4, '日本', 6],
                            ]
                        }))

            with zipfile.ZipFile(file.name, 'r') as output_zip:
                with output_zip.open('foo.csv') as csv_file:
                    output = csv.reader(
                        io.TextIOWrapper(csv_file, encoding='utf-8'))

                    assert [row for row in output] == [
                        ['a', 'bjørn', 'c'],
                        ['1', '2', '3'],
                        ['4', '日本', '6'],
                    ]
Beispiel #21
0
    def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog):
        def _pull_data(since, until, batch_size=10):
            args = make_args(
                query='tests/009_integration.xlsx',
                output_format='sql',
                output='',
                username=os.environ['HQ_USERNAME'],
                password=os.environ['HQ_API_KEY'],
                auth_mode='apikey',
                project='corpora',
                batch_size=batch_size,
                since=since,
                until=until
            )

            # have to mock these to override the pool class otherwise they hold the db connection open
            writer_patch = mock.patch('commcare_export.cli._get_writer', return_value=writer)
            checkpoint_patch = mock.patch('commcare_export.cli._get_checkpoint_manager', return_value=checkpoint_manager)
            with writer_patch, checkpoint_patch:
                main_with_args(args)

        with open('tests/009_expected_form_data.csv', 'r') as f:
            reader = csv.reader(f)
            expected_form_data = list(reader)[1:]

        _pull_data('2012-01-01', '2012-08-01')
        self._check_checkpoints(caplog, ['batch', 'batch', 'final'])
        self._check_data(writer, expected_form_data[:16])

        caplog.clear()
        _pull_data(None, '2012-09-01', batch_size=20)
        self._check_data(writer, expected_form_data)
        self._check_checkpoints(caplog, ['batch', 'final'])

        runs = list(writer.engine.execute('SELECT * from commcare_export_runs'))
        assert len(runs) == 2
Beispiel #22
0
 def _data(self, name, delimiter=',', encoding='utf-8'):
     with self._open(name, encoding=encoding) as csv_file:
         result = list(csv.reader(csv_file, delimiter=delimiter))
     return result
Beispiel #23
0
    def get_user_data_from_csv(self):
        filepath = self.options.file
        if not os.path.isfile(filepath):
            raise Exception("Can't retrieve user data. %s does not exist" %
                            filepath)

        user_data_list = []
        wrong_rows = []

        with io.open(filepath, encoding=self.options.encoding,
                     newline='') as csv_file:
            self.log("Reading file %s ......" % filepath, True)
            csv_reader = csv.reader(csv_file, delimiter=',')
            for i, row in enumerate(csv_reader):

                if i == 0:
                    fieldnames = row

                    # Attribute mapping if applies
                    if ATTRIBUTE_MAPPINGS:
                        names_to_change = ATTRIBUTE_MAPPINGS.keys()
                        for j, value in enumerate(fieldnames):
                            if fieldnames[j] in names_to_change:
                                fieldnames[j] = ATTRIBUTE_MAPPINGS[
                                    fieldnames[j]]
                else:
                    if len(fieldnames) != len(row):
                        self.log(
                            "Line %s of the CSV has incomplete data %s fields instead of %s"
                            % (i, len(row), len(fieldnames)))
                        wrong_rows.append(row)
                    else:
                        user_data = {'data': {}, 'custom_data': {}}
                        for j, value in enumerate(row):
                            fieldname = fieldnames[j]
                            if 'custom_attribute_' in fieldname:
                                custom_attr_name = fieldname.replace(
                                    'custom_attribute_', '')
                                user_data['custom_data'][
                                    custom_attr_name] = value
                            else:
                                user_data['data'][fieldname] = value
                        user_data_list.append(user_data)
            # Now apply manipulations
            if SPLIT_ATTRIBUTES_BY_FIRST_CHAR.items():
                for key, data in SPLIT_ATTRIBUTES_BY_FIRST_CHAR.items():
                    if key in fieldnames:
                        char = data['char']
                        maxsplit = len(data['destination'])
                        for j, user_data in enumerate(user_data_list.copy()):
                            if 'custom_attribute_' in key:
                                value = user_data['custom_data'][key.replace(
                                    'custom_attribute_', '')]
                            else:
                                value = user_data['data'][key]
                            list_values = value.split(char, maxsplit)
                            for z, dest_key in enumerate(data['destination']):
                                value = ""
                                if z < len(list_values):
                                    value = list_values[z]
                                if 'custom_attribute_' in dest_key:
                                    dest_key = dest_key.replace(
                                        'custom_attribute_', '')
                                    if value or not dest_key in user_data[
                                            'custom_data'] or not user_data[
                                                'custom_data'][dest_key]:
                                        user_data['custom_data'][
                                            dest_key] = value
                                else:
                                    if value or not dest_key in user_data[
                                            'data'] or not user_data['data'][
                                                dest_key]:
                                        user_data['data'][dest_key] = value
                        user_data_list[j] = user_data

        if wrong_rows:
            original_filename = os.path.basename(filepath)
            wrong_rows_filename = 'invalid_%s' % original_filename
            wrong_rows_filepath = filepath.replace(original_filename,
                                                   wrong_rows_filename)
            wrong_rows_file = open(wrong_rows_filepath, 'w')
            writer = csv.writer(wrong_rows_file, dialect=csv.excel)
            writer.writerow(fieldnames)
            for row in wrong_rows:
                writer.writerow(row)
            wrong_rows_file.close()
            self.log(
                "Found %s wrong rows, will be skipped from the process so they are exported to %s"
                % (len(wrong_rows), wrong_rows_filepath), True)

        return user_data_list
Beispiel #24
0
 def _data(self, name, delimiter=',', encoding='utf-8'):
     with self._open(name, encoding=encoding) as csv_file:
         result = list(csv.reader(csv_file, delimiter=delimiter))
     return result
Beispiel #25
0
 def test_can_read_from_StringIO(self):
     with io.StringIO('ä,b\nc') as csv_stream:
         csv_reader = csv.reader(csv_stream, delimiter=',')
         actual_rows = list(csv_reader)
     self.assertEqual([['ä', 'b'], ['c']], actual_rows)
Beispiel #26
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     f = UTF8Recoder(f, encoding)
     self.reader = csv.reader(f, dialect=dialect, **kwds)
Beispiel #27
0
 def test_can_read_from_StringIO(self):
     with io.StringIO('ä,b\nc') as csv_stream:
         csv_reader = csv.reader(csv_stream, delimiter=',')
         actual_rows = list(csv_reader)
     self.assertEqual([['ä', 'b'], ['c']], actual_rows)