コード例 #1
0
 def test_values_dump(self):
     file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file
     reader = read_file_as_csv(file)
     next(reader)  # skip header
     row = next(reader)
     self.assertEqual(row[0], self.catalog_id)
     self.assertEqual(row[6], 'R/P1D')
コード例 #2
0
    def csv_to_xlsx(self):
        """Escribe el dump en XLSX en un archivo temporal, luego lo guarda en el storage,
        por último borra el archivo temporal. Se debe hacer así para hacer un "upload" al
        storage distribuido.
        """
        xlsx = self.xlsx_file_name()
        with self.csv_dump_file.file as f:
            reader = read_file_as_csv(f)
            header_row = next(reader)
            multiple_sheets = self.multiple_sheets[
                self.csv_dump_file.file_name]
            workbook = self.workbook_class(
                xlsx,
                header_row=header_row,
                split_by_frequency=multiple_sheets,
                formats=formats[self.csv_dump_file.file_name])

            for row in reader:
                workbook.write_row(row)

        if multiple_sheets:
            workbook.worksheets_objs.sort(key=sort_key)

        workbook.close()

        with open(xlsx, 'rb') as f:
            self.task.dumpfile_set.create(
                file_name=self.csv_dump_file.file_name,
                file_type=DumpFile.TYPE_XLSX,
                node=self.csv_dump_file.node,
                file=File(f))

        os.remove(xlsx)
コード例 #3
0
 def test_run_catalog_unavailable_fields(self):
     field = Field.objects.last()
     field.enhanced_meta.get(key=meta_keys.AVAILABLE).delete()
     task = GenerateDumpTask.objects.create()
     DumpGenerator(task, self.catalog_id).generate()
     file = task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA,
                                  file_type=DumpFile.TYPE_CSV).file
     reader = read_file_as_csv(file)
     for row in reader:
         self.assertNotEqual(row[5], field.title)
コード例 #4
0
    def test_metadata_csv_hits(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])
        self.assertEqual(row[25], meta_keys.get(field, meta_keys.HITS_TOTAL))
        self.assertEqual(row[26], meta_keys.get(field, meta_keys.HITS_30_DAYS))
        self.assertEqual(row[27], meta_keys.get(field, meta_keys.HITS_90_DAYS))
        self.assertEqual(row[28], meta_keys.get(field, meta_keys.HITS_180_DAYS))
コード例 #5
0
    def test_full_csv_identifier_fields(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])
        self.assertEqual(row[0], self.catalog_id)
        self.assertEqual(row[1], field.distribution.dataset.identifier)
        self.assertEqual(row[2], field.distribution.identifier)
        self.assertEqual(row[5], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
コード例 #6
0
    def test_entity_identifiers(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file
        reader = read_file_as_csv(file)
        next(reader)

        row = next(reader)

        field_id = row[3]
        field = Field.objects.get(identifier=field_id)

        self.assertEqual(self.catalog_id, row[0])
        self.assertEqual(field.distribution.identifier, row[2])
        self.assertEqual(field.distribution.dataset.identifier, row[1])
        self.assertEqual(row[6], field.distribution.enhanced_meta.get(key=meta_keys.PERIODICITY).value)
コード例 #7
0
    def test_full_csv_dataset_metadata_fields(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])

        dataset_meta = json.loads(field.distribution.dataset.metadata)
        self.assertEqual(row[12], dataset_meta['publisher']['name'])
        self.assertEqual(row[13], dataset_meta['source'])
        self.assertEqual(row[14], field.distribution.dataset.title)
コード例 #8
0
    def test_leading_nulls_distribution(self):
        path = os.path.join(samples_dir, 'leading_nulls_distribution.json')
        index_catalog('leading_null', path, self.index)
        self.task = GenerateDumpTask()
        self.task.save()
        gen = DumpGenerator(self.task, 'leading_null')
        gen.generate()

        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV,
                                          node__catalog_id='leading_null').file
        reader = read_file_as_csv(file)

        next(reader)  # Header!!!!
        self.assertEqual(len(list(reader)), 2)
コード例 #9
0
    def test_sources_csv_columns(self):
        dataset = Field.objects.first().distribution.dataset
        meta = json.loads(dataset.metadata)

        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)
        series = Field.objects.exclude(title='indice_tiempo')
        self.assertEqual(row[0], meta['source'])  # nombre de la fuente
        self.assertEqual(int(row[1]), 3)  # Cantidad de series
        self.assertEqual(int(row[2]), sum([int(meta_keys.get(x, meta_keys.INDEX_SIZE))
                                           for x in series]))
        self.assertEqual(row[3], min(meta_keys.get(x, meta_keys.INDEX_START) for x in series))
        self.assertEqual(row[4], max(meta_keys.get(x, meta_keys.INDEX_END) for x in series))
コード例 #10
0
    def test_full_csv_metadata_fields(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        row = next(reader)

        field = Field.objects.get(identifier=row[3])

        field_meta = json.loads(field.metadata)
        distribution_meta = json.loads(field.distribution.metadata)
        self.assertEqual(row[7], field.title)
        self.assertEqual(row[8], field_meta['units'])
        self.assertEqual(row[9], field_meta['description'])
        self.assertEqual(row[10], distribution_meta['description'])
コード例 #11
0
    def test_full_csv_dataset_theme_field(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_FULL,
                                          file_type=DumpFile.TYPE_CSV).file
        reader = read_file_as_csv(file)
        next(reader)  # Header
        row = next(reader)

        field = Field.objects.get(identifier=row[3])

        dataset_meta = json.loads(field.distribution.dataset.metadata)

        themes = json.loads(Node.objects.get(catalog_id=self.catalog_id).catalog)['themeTaxonomy']

        theme_label = ''
        for theme in themes:
            if theme['id'] == dataset_meta['theme'][0]:
                theme_label = theme['label']
                break

        self.assertEqual(theme_label, row[11])
コード例 #12
0
 def test_values_length(self):
     file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_VALUES).file
     reader = read_file_as_csv(file)
     header = next(reader)
     self.assertEqual(len(header), 7)
コード例 #13
0
    def test_sources_csv(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_SOURCES).file
        reader = read_file_as_csv(file)
        next(reader)  # Header

        self.assertEqual(len(list(reader)), 1)  # Un row por fuente
コード例 #14
0
    def test_metadata_csv(self):
        file = self.task.dumpfile_set.get(file_name=DumpFile.FILENAME_METADATA).file
        reader = read_file_as_csv(file)
        next(reader)

        self.assertEqual(len(list(reader)), 3)  # Un row por serie