def _get_context_arguments(self): with CsvReader( '{0}/Input/{1}'.format(self.merge_root_path, self.file_name), self.delimiter) as file: header_columns = file.header_columns rds_provider.create_map_columns_file( '{}/map_file.txt'.format(self.merge_root_path), self.map_id, self.delimiter) map_columns = self._get_map_columns(header_columns) return ArgumentMap({ 'entity_type': self.entity_type, 'delimiter': self.delimiter, 'map_id': self.map_id, 'control_file_id': self.control_file_id, 'file_location': '{0}/Input/{1}'.format(self.merge_root_path, self.file_name), 'file_name': self.file_name, 'merge_root_path': self.merge_root_path, 'is_debug_mode': self.is_debug_mode, 'map_columns': map_columns, 'header_columns': header_columns })
def _load_data(self): for file in os.listdir('{}/MemberData'.format(self.temp_folder)): with CsvReader('{}/MemberData/{}'.format(self.temp_folder, file), '|', has_header_columns=False) as csv_reader: for row in csv_reader.read_records(): self.member_dict[row[ self.unload_columns_index['member_id_member']]] = row
def _load_npi_data(self): for npi_file in os.listdir('{}/NpiValues'.format(self.temp_folder)): with CsvReader('{}/NpiValues/{}'.format(self.temp_folder, npi_file), '|', has_header_columns=False) as csv_reader: for row in csv_reader.read_records(): self.npi_lookup_dict[row[self.npi_header_columns[ 'npi']]] = self._get_npi_value(row)
async def _reader_task(self, queue, context): batch_filter_func = self._batch_filter_func(context) with CsvReader( '{0}/Input/{1}'.format(self.merge_root_path, self.file_name), self.delimiter, buffer_size) as reader: for batch_records in reader.batch_with_filter_records( batch_filter_func): await queue.put(batch_records) await queue.put(None) # poison pill to signal all the work is done
def _load_npi_lookup_data(self): npi_index = [ x['index'] for x in self.npi_field_maps if x['field'] == 'npi' ][0] for file in os.listdir('{}/NpiValues'.format(self.temp_folder)): with CsvReader('{0}/NpiValues/{1}'.format(self.temp_folder, file), '|', has_header_columns=False) as csv_reader: for line in csv_reader.read_records(): self.npi_lookup_dict[line[npi_index]] = line
def _prepare_base_data(self, context, pre_process_scripts): need_row_file_precess_scripts = [ x for x in pre_process_scripts if 'read_row_file_rows' in dir(x) ] if len(need_row_file_precess_scripts) > 0: batch_filter_func = self._batch_filter_func(context) with CsvReader( '{0}/Input/{1}'.format(self.merge_root_path, self.file_name), self.delimiter, buffer_size) as reader: for batch_records in reader.batch_with_filter_records( batch_filter_func): for pre_process_script in need_row_file_precess_scripts: pre_process_script.read_row_file_rows(batch_records) for pre_process_script in pre_process_scripts: pre_process_script.prepare_data()
def _download_npi_data(self): file_utility.create_folder('{}/NPIJsonFiles'.format(self.temp_folder)) for npi_file in os.listdir('{}/NpiValues'.format(self.temp_folder)): with CsvReader('{}/NpiValues/{}'.format(self.temp_folder, npi_file), '|', has_header_columns=False) as csv_reader: with FuturesSession(max_workers=10) as session: for number in csv_reader.read_records(): parameters = { "number": number[0], "version": "2.1", "pretty": "on", "address_purpose": "" } session.get( "https://npiregistry.cms.hhs.gov/api", params=parameters, background_callback=self._create_npi_json_file)
def _update_npi_database(self): with CsvReader('{0}/Export.csv'.format(self.temp_folder), delimiter=',') as file: columns = file.header_columns s3_provider.upload_file('{0}/Export.csv'.format(self.temp_folder), '{}/Export.csv'.format(self.temp_s3_folder)) redshift_provider.copy_data_from_s3({ 'table_name': 'npilookupvalues', 'ignore_header': True, 'columns': '"{}"'.format('","'.join(columns)), 's3_location': '{}/Export.csv'.format(self.temp_s3_folder), 'delimiter': ',', 'csv_quote': '"' })
def _load_linked_claim_replacement_data(self): for file in os.listdir('{}/LinkedClaimIds'.format(self.temp_folder)): with CsvReader('{0}/LinkedClaimIds/{1}'.format(self.temp_folder, file), '|', has_header_columns=False) as csv_reader: for line in csv_reader.read_records(): self.linked_claim_id_by_previous_claim_number[line[0]] = line[1]