def test_values_dump(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file reader = read_file_as_csv(file) next(reader) # skip header row = next(reader) self.assertEqual(row[0], self.catalog_id) self.assertEqual(row[6], 'R/P1D')
def csv_to_xlsx(self): """Escribe el dump en XLSX en un archivo temporal, luego lo guarda en el storage, por último borra el archivo temporal. Se debe hacer así para hacer un "upload" al storage distribuido. """ xlsx = self.xlsx_file_name() with self.csv_dump_file.file as f: reader = read_file_as_csv(f) header_row = next(reader) multiple_sheets = self.multiple_sheets[ self.csv_dump_file.file_name] workbook = self.workbook_class( xlsx, header_row=header_row, split_by_frequency=multiple_sheets, formats=formats[self.csv_dump_file.file_name]) for row in reader: workbook.write_row(row) if multiple_sheets: workbook.worksheets_objs.sort(key=sort_key) workbook.close() with open(xlsx, 'rb') as f: self.task.dumpfile_set.create( file_name=self.csv_dump_file.file_name, file_type=DumpFile.TYPE_XLSX, node=self.csv_dump_file.node, file=File(f)) os.remove(xlsx)
def test_run_catalog_unavailable_fields(self): field = Field.objects.last() field.enhanced_meta.get(key=meta_keys.AVAILABLE).delete() task = GenerateDumpTask.objects.create() DumpGenerator(task, self.catalog_id).generate() file = task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) for row in reader: self.assertNotEqual(row[5], field.title)
def test_metadata_csv_hits(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) self.assertEqual(row[25], meta_keys.get(field, meta_keys.HITS_TOTAL)) self.assertEqual(row[26], meta_keys.get(field, meta_keys.HITS_30_DAYS)) self.assertEqual(row[27], meta_keys.get(field, meta_keys.HITS_90_DAYS)) self.assertEqual(row[28], meta_keys.get(field, meta_keys.HITS_180_DAYS))
def test_full_csv_identifier_fields(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) self.assertEqual(row[0], self.catalog_id) self.assertEqual(row[1], field.distribution.dataset.identifier) self.assertEqual(row[2], field.distribution.identifier) self.assertEqual(row[5], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
def test_entity_identifiers(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file reader = read_file_as_csv(file) next(reader) row = next(reader) field_id = row[3] field = Field.objects.get(identifier=field_id) self.assertEqual(self.catalog_id, row[0]) self.assertEqual(field.distribution.identifier, row[2]) self.assertEqual(field.distribution.dataset.identifier, row[1]) self.assertEqual(row[6], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
def test_full_csv_dataset_metadata_fields(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) dataset_meta = json.loads(field.distribution.dataset.metadata) self.assertEqual(row[12], dataset_meta['publisher']['name']) self.assertEqual(row[13], dataset_meta['source']) self.assertEqual(row[14], field.distribution.dataset.title)
def test_leading_nulls_distribution(self): path = os.path.join(samples_dir, 'leading_nulls_distribution.json') index_catalog('leading_null', path, self.index) self.task = GenerateDumpTask() self.task.save() gen = DumpGenerator(self.task, 'leading_null') gen.generate() file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV, node__catalog_id='leading_null').file reader = read_file_as_csv(file) next(reader) # Header!!!! self.assertEqual(len(list(reader)), 2)
def test_sources_csv_columns(self): dataset = Field.objects.first().distribution.dataset meta = json.loads(dataset.metadata) file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) series = Field.objects.exclude(title='indice_tiempo') self.assertEqual(row[0], meta['source']) # nombre de la fuente self.assertEqual(int(row[1]), 3) # Cantidad de series self.assertEqual(int(row[2]), sum([int(meta_keys.get(x, meta_keys.INDEX_SIZE)) for x in series])) self.assertEqual(row[3], min(meta_keys.get(x, meta_keys.INDEX_START) for x in series)) self.assertEqual(row[4], max(meta_keys.get(x, meta_keys.INDEX_END) for x in series))
def test_full_csv_metadata_fields(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) field_meta = json.loads(field.metadata) distribution_meta = json.loads(field.distribution.metadata) self.assertEqual(row[7], field.title) self.assertEqual(row[8], field_meta['units']) self.assertEqual(row[9], field_meta['description']) self.assertEqual(row[10], distribution_meta['description'])
def test_full_csv_dataset_theme_field(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL, file_type=DumpFile.TYPE_CSV).file reader = read_file_as_csv(file) next(reader) # Header row = next(reader) field = Field.objects.get(identifier=row[3]) dataset_meta = json.loads(field.distribution.dataset.metadata) themes = json.loads(Node.objects.get(catalog_id=self.catalog_id).catalog)['themeTaxonomy'] theme_label = '' for theme in themes: if theme['id'] == dataset_meta['theme'][0]: theme_label = theme['label'] break self.assertEqual(theme_label, row[11])
def test_values_length(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file reader = read_file_as_csv(file) header = next(reader) self.assertEqual(len(header), 7)
def test_sources_csv(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file reader = read_file_as_csv(file) next(reader) # Header self.assertEqual(len(list(reader)), 1) # Un row por fuente
def test_metadata_csv(self): file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA).file reader = read_file_as_csv(file) next(reader) self.assertEqual(len(list(reader)), 3) # Un row por serie