def test_values_dump(self):
     file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file
     reader = read_file_as_csv(file)
     next(reader)  # skip header
     row = next(reader)
     self.assertEqual(row[0], self.catalog_id)
     self.assertEqual(row[6], 'R/P1D')
Example #2
0
    def csv_to_xlsx(self):
        """Escribe el dump en XLSX en un archivo temporal, luego lo guarda en el storage,
        por Ășltimo borra el archivo temporal. Se debe hacer asĂ­ para hacer un "upload" al
        storage distribuido.
        """
        xlsx = self.xlsx_file_name()
        with self.csv_dump_file.file as f:
            reader = read_file_as_csv(f)
            header_row = next(reader)
            multiple_sheets = self.multiple_sheets[
                self.csv_dump_file.file_name]
            workbook = self.workbook_class(
                xlsx,
                header_row=header_row,
                split_by_frequency=multiple_sheets,
                formats=formats[self.csv_dump_file.file_name])

            for row in reader:
                workbook.write_row(row)

        if multiple_sheets:
            workbook.worksheets_objs.sort(key=sort_key)

        workbook.close()

        with open(xlsx, 'rb') as f:
            self.task.dumpfile_set.create(
                file_name=self.csv_dump_file.file_name,
                file_type=DumpFile.TYPE_XLSX,
                node=self.csv_dump_file.node,
                file=File(f))

        os.remove(xlsx)
 def test_run_catalog_unavailable_fields(self):
     field = Field.objects.last()
     field.enhanced_meta.get(key=meta_keys.AVAILABLE).delete()
     task = GenerateDumpTask.objects.create()
     DumpGenerator(task, self.catalog_id).generate()
     file = task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA,
                                  file_type=DumpFile.TYPE_CSV).file
     reader = read_file_as_csv(file)
     for row in reader:
         self.assertNotEqual(row[5], field.title)
    def test_metadata_csv_hits(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])
        self.assertEqual(row[25], meta_keys.get(field, meta_keys.HITS_TOTAL))
        self.assertEqual(row[26], meta_keys.get(field, meta_keys.HITS_30_DAYS))
        self.assertEqual(row[27], meta_keys.get(field, meta_keys.HITS_90_DAYS))
        self.assertEqual(row[28], meta_keys.get(field, meta_keys.HITS_180_DAYS))
    def test_full_csv_identifier_fields(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])
        self.assertEqual(row[0], self.catalog_id)
        self.assertEqual(row[1], field.distribution.dataset.identifier)
        self.assertEqual(row[2], field.distribution.identifier)
        self.assertEqual(row[5], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
    def test_entity_identifiers(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file
        reader = read_file_as_csv(file)
        next(reader)

        row = next(reader)

        field_id = row[3]
        field = Field.objects.get(identifier=field_id)

        self.assertEqual(self.catalog_id, row[0])
        self.assertEqual(field.distribution.identifier, row[2])
        self.assertEqual(field.distribution.dataset.identifier, row[1])
        self.assertEqual(row[6], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
    def test_full_csv_dataset_metadata_fields(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])

        dataset_meta = json.loads(field.distribution.dataset.metadata)
        self.assertEqual(row[12], dataset_meta['publisher']['name'])
        self.assertEqual(row[13], dataset_meta['source'])
        self.assertEqual(row[14], field.distribution.dataset.title)
    def test_leading_nulls_distribution(self):
        path = os.path.join(samples_dir, 'leading_nulls_distribution.json')
        index_catalog('leading_null', path, self.index)
        self.task = GenerateDumpTask()
        self.task.save()
        gen = DumpGenerator(self.task, 'leading_null')
        gen.generate()

        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV,
                                          node__catalog_id='leading_null').file
        reader = read_file_as_csv(file)

        next(reader)  # Header!!!!
        self.assertEqual(len(list(reader)), 2)
    def test_sources_csv_columns(self):
        dataset = Field.objects.first().distribution.dataset
        meta = json.loads(dataset.metadata)

        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)
        series = Field.objects.exclude(title='indice_tiempo')
        self.assertEqual(row[0], meta['source'])  # nombre de la fuente
        self.assertEqual(int(row[1]), 3)  # Cantidad de series
        self.assertEqual(int(row[2]), sum([int(meta_keys.get(x, meta_keys.INDEX_SIZE))
                                           for x in series]))
        self.assertEqual(row[3], min(meta_keys.get(x, meta_keys.INDEX_START) for x in series))
        self.assertEqual(row[4], max(meta_keys.get(x, meta_keys.INDEX_END) for x in series))
    def test_full_csv_metadata_fields(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])

        field_meta = json.loads(field.metadata)
        distribution_meta = json.loads(field.distribution.metadata)
        self.assertEqual(row[7], field.title)
        self.assertEqual(row[8], field_meta['units'])
        self.assertEqual(row[9], field_meta['description'])
        self.assertEqual(row[10], distribution_meta['description'])
    def test_full_csv_dataset_theme_field(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header
        row = next(reader)

        field = Field.objects.get(identifier=row[3])

        dataset_meta = json.loads(field.distribution.dataset.metadata)

        themes = json.loads(Node.objects.get(catalog_id=self.catalog_id).catalog)['themeTaxonomy']

        theme_label = ''
        for theme in themes:
            if theme['id'] == dataset_meta['theme'][0]:
                theme_label = theme['label']
                break

        self.assertEqual(theme_label, row[11])
 def test_values_length(self):
     file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file
     reader = read_file_as_csv(file)
     header = next(reader)
     self.assertEqual(len(header), 7)
    def test_sources_csv(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        self.assertEqual(len(list(reader)), 1)  # Un row por fuente
    def test_metadata_csv(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA).file
        reader = read_file_as_csv(file)
        next(reader)

        self.assertEqual(len(list(reader)), 3)  # Un row por serie