Exemple #1
0
def copy_to_database(self, *, s_file_id):
    '''
    Define a task method, and bind it to the base task. The setup
    method of the base task will be fired by the task_prerun signal
    handler, init_task, before the code in this task method is run,
    e.g, self.s_file and self.import_utility are available.
    '''
    table_name = self.s_file.raw_table_name

    columns = '''
        record_id UUID DEFAULT gen_random_uuid(),
        responding_agency VARCHAR,
        employer VARCHAR,
        last_name VARCHAR,
        first_name VARCHAR,
        title VARCHAR,
        department VARCHAR,
        base_salary VARCHAR,
        extra_pay VARCHAR,
        date_started VARCHAR,
        data_year INT
    '''

    create = 'CREATE TABLE {} ({})'.format(table_name, columns)

    with connection.cursor() as cursor:
        cursor.execute(create)

    meta = CsvMeta(self.s_file.standardized_file)
    formatted_data_file = meta.trim_extra_fields()

    with open(formatted_data_file, 'r', encoding='utf-8') as f:
        with connection.cursor() as cursor:
            copy_fmt = 'COPY "{table}" ({cols}) FROM STDIN CSV HEADER'

            copy = copy_fmt.format(table=table_name,
                                   cols=','.join(meta.REQUIRED_FIELDS))

            cursor.copy_expert(copy, f)

            cursor.execute('CREATE INDEX ON {} (TRIM(LOWER(employer)))'.format(
                table_name))

    self.update_status('copied to database')

    return 'Copied {} to database'.format(formatted_data_file)
Exemple #2
0
    def clean_standardized_file(self):
        s_file = self.cleaned_data['standardized_file']

        meta = CsvMeta(s_file)

        self._validate_filetype(meta.file_type)
        self._validate_fields(meta.field_names)

        return s_file
Exemple #3
0
    def validate(self, data_file):
        with open(data_file, 'rb') as df:
            meta = CsvMeta(File(df))

            if meta.file_type != 'csv':
                raise CommandError('Data file must be a CSV')

            missing_fields = ', '.join(
                set(CsvMeta.REQUIRED_FIELDS) - set(meta.field_names))

            if missing_fields:
                message = 'Standardized file missing fields: {}'.format(
                    missing_fields)
                raise CommandError(message)

            valid_file_name = meta.trim_extra_fields()

        self.stdout.write('Validated {}'.format(data_file))

        return valid_file_name
Exemple #4
0
    def clean_standardized_file(self):
        s_file = self.cleaned_data['standardized_file']

        meta = CsvMeta(s_file)

        self._validate_filetype(meta.file_type)
        self._validate_fields(meta.field_names)

        now = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S')
        s_file.name = '{}-{}'.format(now, s_file.name)

        return s_file
Exemple #5
0
def test_match_or_create_responding_agency(raw_table_setup, canned_data,
                                           employer, queue, raw_field, model,
                                           model_kwargs):
    s_file = raw_table_setup

    q = queue(s_file.id)

    name = canned_data[raw_field]

    item = {'id': None, 'name': name}

    if isinstance(q, ChildEmployerQueue):
        parent = canned_data['Employer']
        employer.build(name=parent, vintage=s_file.upload)
        item['parent'] = parent

    for match in (None, 'a matching agency'):
        q.match_or_create(item.copy(), match)

        with connection.cursor() as cursor:
            select = '''
                SELECT EXISTS(
                  SELECT 1
                  FROM {raw_payroll}
                  WHERE {processed_field} = '{item}'
                ),
                EXISTS(
                  SELECT 1
                  FROM {raw_payroll}
                  WHERE {processed_field} = '{match}'
                )
            '''.format(raw_payroll=s_file.raw_table_name,
                       processed_field=CsvMeta._clean_field(raw_field),
                       item=name,
                       match=match)

            cursor.execute(select)

            item_exists, match_exists = cursor.fetchone()

            if match:
                assert match_exists and not item_exists

            else:
                assert item_exists and not match_exists
                assert model.objects.get(name=name, **model_kwargs)