def test_values_dump(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file reader = read_file_as_csv(file) next(reader) # skip header row = next(reader) self.assertEqual(row[0], self.catalog_id) self.assertEqual(row[6], 'R/P1D')
def test_sources_csv(self): file = self.task.dumpfile_set.get( file_name=DumpFile.FILENAME_SOURCES).file reader = read_file_as_csv(file) next(reader) # Header self.assertEqual(len(list(reader)), 1) # Un row por fuente
def write_sources_table(self): sources = DumpFile.objects.filter(file_name=DumpFile.FILENAME_SOURCES, file_type=DumpFile.TYPE_CSV, node=self.node).last() if sources is None or sources.file is None: return reader = read_file_as_csv(sources.file) next(reader) # Skip header actions = [] for row in reader: Fuentes( fuente=row[self.sources_rows.index('dataset_fuente')], series_cant=row[self.sources_rows.index('series_cant')], valores_cant=row[self.sources_rows.index('valores_cant')], fecha_primer_valor=row[self.sources_rows.index( 'fecha_primer_valor')], fecha_ultimo_valor=row[self.sources_rows.index( 'fecha_ultimo_valor')], ).save() if actions: Fuentes.bulk_create(actions)
def test_metadata_csv(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA).file reader = read_file_as_csv(file) next(reader) # self.assertListEqual(header, constants.METADATA_ROWS) self.assertEqual(len(list(reader)), 3) # Un row por serie
def write_values_table(self): values = DumpFile.objects.filter(file_name=DumpFile.FILENAME_VALUES, file_type=DumpFile.TYPE_CSV, node=self.node).last() if values is None or values.file is None: return reader = read_file_as_csv(values.file) next(reader) # Skip header Valores.bulk_create(self.generate_values_rows(reader), batch_size=1000)
def write_metadata_tables(self): meta = DumpFile.objects.filter(node=self.node, file_name=DumpFile.FILENAME_METADATA, file_type=DumpFile.TYPE_CSV).last() if meta is None or meta.file is None: return reader = read_file_as_csv(meta.file) next(reader) # Skip header Metadatos.bulk_create(self.generate_series_rows(reader), batch_size=100)
def test_full_csv_identifier_fields(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) self.assertEqual(row[0], self.catalog_id) self.assertEqual(row[1], field.distribution.dataset.identifier) self.assertEqual(row[2], field.distribution.identifier) self.assertEqual(row[5], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
def test_full_csv_dataset_metadata_fields(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) dataset_meta = json.loads(field.distribution.dataset.metadata) self.assertEqual(row[12], dataset_meta['publisher']['name']) self.assertEqual(row[13], dataset_meta['source']) self.assertEqual(row[14], field.distribution.dataset.title)
def test_entity_identifiers(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file reader = read_file_as_csv(file) next(reader) row = next(reader) field_id = row[3] field = Field.objects.get(identifier=field_id) self.assertEqual(self.catalog_id, row[0]) self.assertEqual(field.distribution.identifier, row[2]) self.assertEqual(field.distribution.dataset.identifier, row[1]) self.assertEqual(row[6], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
def test_leading_nulls_distribution(self): path = os.path.join(samples_dir, 'leading_nulls_distribution.json') index_catalog('leading_null', path, self.index) self.task = GenerateDumpTask() self.task.save() gen = DumpGenerator(self.task, 'leading_null') gen.generate() file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV, node__catalog_id='leading_null').file reader = read_file_as_csv(file) next(reader) # Header!!!! self.assertEqual(len(list(reader)), 1) # Un único row, para un único valor del CSV
def test_full_csv_metadata_fields(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) field_meta = json.loads(field.metadata) distribution_meta = json.loads(field.distribution.metadata) self.assertEqual(row[7], field.title) self.assertEqual(row[8], field_meta['units']) self.assertEqual(row[9], field_meta['description']) self.assertEqual(row[10], distribution_meta['description'])
def test_sources_csv_columns(self): dataset = Field.objects.first().distribution.dataset meta = json.loads(dataset.metadata) file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) series = Field.objects.exclude(title='indice_tiempo') self.assertEqual(row[0], meta['source']) # nombre de la fuente self.assertEqual(int(row[1]), 3) # Cantidad de series self.assertEqual(int(row[2]), sum([int(meta_keys.get(x, meta_keys.INDEX_SIZE)) for x in series])) self.assertEqual(row[3], min(meta_keys.get(x, meta_keys.INDEX_START) for x in series)) self.assertEqual(row[4], max(meta_keys.get(x, meta_keys.INDEX_END) for x in series))
def test_full_csv_dataset_theme_field(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) dataset_meta = json.loads(field.distribution.dataset.metadata) themes = json.loads(Node.objects.get(catalog_id=self.catalog_id).catalog)['themeTaxonomy'] theme_label = '' for theme in themes: if theme['id'] == dataset_meta['theme'][0]: theme_label = theme['label'] break self.assertEqual(theme_label, row[11])
def test_values_length(self): file = self.task.dumpfile_set.get( file_name=DumpFile.FILENAME_VALUES).file reader = read_file_as_csv(file) header = next(reader) self.assertEqual(len(header), 7)