Esempio n. 1
0
 def create_test_dump_file(self):
     obj = HttpResourceMock.objects.get(id=1)
     queryset = HttpResourceMock.objects.filter(id__in=[2, 3, 4, 5, 6])
     dump_path = get_dumps_path(obj)
     with open(os.path.join(dump_path, "read-dump-test.json"), "w") as fd:
         object_to_disk(obj, fd)
         queryset_to_disk(queryset, fd, batch_size=2)
Esempio n. 2
0
    def handle_label(self, dataset_label, **options):
        dataset = Dataset.objects.get(name=dataset_label)

        destination = get_dumps_path(dataset)
        if not os.path.exists(destination):
            os.makedirs(destination)
        dataset_file = os.path.join(
            destination, "{}.{}.json".format(dataset.name, dataset.id))
        with open(dataset_file, "w") as json_file:
            object_to_disk(dataset, json_file)
            queryset_to_disk(dataset.harvestsource_set, json_file)
            queryset_to_disk(dataset.harvest_set, json_file)
            queryset_to_disk(dataset.versions.filter(is_current=True),
                             json_file)
            for version in dataset.versions.filter(is_current=True):
                queryset_to_disk(version.indices, json_file)
                queryset_to_disk(version.collection_set, json_file)
                queryset_to_disk(version.document_set, json_file)
            queryset_to_disk(Extension.objects.all(), json_file)

        resource_files = self.dump_resources()

        # Sync files with AWS
        if environment.env != "localhost":
            logger.info("Uploading files to AWS")
            ctx = Context(environment)
            harvester_data_bucket = f"s3://{environment.aws.harvest_content_bucket}/datasets/harvester"
            for file in [dataset_file] + resource_files:
                remote_file = harvester_data_bucket + file.replace(
                    settings.DATAGROWTH_DATA_DIR, "", 1)
                ctx.run(f"aws s3 cp {file} {remote_file}", echo=True)
Esempio n. 3
0
 def handle_dataset(self, dataset, *args, **options):
     setattr(dataset, "current_growth", None)  # resets the dataset
     destination = get_dumps_path(dataset)
     if not os.path.exists(destination):
         os.makedirs(destination)
     file_name = os.path.join(
         destination, "{}.{}.json".format(dataset.signature, dataset.id))
     with open(file_name, "w") as json_file:
         object_to_disk(dataset, json_file)
         queryset_to_disk(dataset.growth_set, json_file)
         queryset_to_disk(dataset.collections, json_file)
         queryset_to_disk(dataset.documents, json_file)
Esempio n. 4
0
 def test_object_to_disk(self, serialize_mock):
     obj = HttpResourceMock.objects.get(id=1)
     fd = open(self.get_file_path("write"), "w")
     fd.write = Mock()
     object_to_disk(obj, fd)
     self.assertEquals(fd.write.call_count, 1)
     self.assertEquals(serialize_mock.call_count, 1)
     call = fd.write.call_args_list[0]
     args, kwargs = call
     line = args[0]
     self.assertTrue(line.endswith("\n"))
     models = json.loads(line)
     self.assertEquals(len(models), 1)
     model = models[0]
     self.assertEquals(model["pk"], 1)
Esempio n. 5
0
    def handle_label(self, freeze_label, **options):

        freeze = Freeze.objects.get(name=freeze_label)

        destination = get_dumps_path(freeze)
        if not os.path.exists(destination):
            os.makedirs(destination)
        file_name = os.path.join(destination,
                                 "{}.{}.json".format(freeze.name, freeze.id))
        with open(file_name, "w") as json_file:
            object_to_disk(freeze, json_file)
            queryset_to_disk(freeze.edurepsource_set, json_file)
            queryset_to_disk(freeze.edurepharvest_set, json_file)
            queryset_to_disk(freeze.indices, json_file)
            queryset_to_disk(freeze.collection_set, json_file)
            queryset_to_disk(freeze.arrangement_set, json_file)
            queryset_to_disk(freeze.document_set, json_file)

        self.dump_resources()