def create_test_dump_file(self): obj = HttpResourceMock.objects.get(id=1) queryset = HttpResourceMock.objects.filter(id__in=[2, 3, 4, 5, 6]) dump_path = get_dumps_path(obj) with open(os.path.join(dump_path, "read-dump-test.json"), "w") as fd: object_to_disk(obj, fd) queryset_to_disk(queryset, fd, batch_size=2)
def handle_label(self, dataset_label, **options): dataset = Dataset.objects.get(name=dataset_label) destination = get_dumps_path(dataset) if not os.path.exists(destination): os.makedirs(destination) dataset_file = os.path.join( destination, "{}.{}.json".format(dataset.name, dataset.id)) with open(dataset_file, "w") as json_file: object_to_disk(dataset, json_file) queryset_to_disk(dataset.harvestsource_set, json_file) queryset_to_disk(dataset.harvest_set, json_file) queryset_to_disk(dataset.versions.filter(is_current=True), json_file) for version in dataset.versions.filter(is_current=True): queryset_to_disk(version.indices, json_file) queryset_to_disk(version.collection_set, json_file) queryset_to_disk(version.document_set, json_file) queryset_to_disk(Extension.objects.all(), json_file) resource_files = self.dump_resources() # Sync files with AWS if environment.env != "localhost": logger.info("Uploading files to AWS") ctx = Context(environment) harvester_data_bucket = f"s3://{environment.aws.harvest_content_bucket}/datasets/harvester" for file in [dataset_file] + resource_files: remote_file = harvester_data_bucket + file.replace( settings.DATAGROWTH_DATA_DIR, "", 1) ctx.run(f"aws s3 cp {file} {remote_file}", echo=True)
def handle_dataset(self, dataset, *args, **options): setattr(dataset, "current_growth", None) # resets the dataset destination = get_dumps_path(dataset) if not os.path.exists(destination): os.makedirs(destination) file_name = os.path.join( destination, "{}.{}.json".format(dataset.signature, dataset.id)) with open(file_name, "w") as json_file: object_to_disk(dataset, json_file) queryset_to_disk(dataset.growth_set, json_file) queryset_to_disk(dataset.collections, json_file) queryset_to_disk(dataset.documents, json_file)
def test_object_to_disk(self, serialize_mock): obj = HttpResourceMock.objects.get(id=1) fd = open(self.get_file_path("write"), "w") fd.write = Mock() object_to_disk(obj, fd) self.assertEquals(fd.write.call_count, 1) self.assertEquals(serialize_mock.call_count, 1) call = fd.write.call_args_list[0] args, kwargs = call line = args[0] self.assertTrue(line.endswith("\n")) models = json.loads(line) self.assertEquals(len(models), 1) model = models[0] self.assertEquals(model["pk"], 1)
def handle_label(self, freeze_label, **options): freeze = Freeze.objects.get(name=freeze_label) destination = get_dumps_path(freeze) if not os.path.exists(destination): os.makedirs(destination) file_name = os.path.join(destination, "{}.{}.json".format(freeze.name, freeze.id)) with open(file_name, "w") as json_file: object_to_disk(freeze, json_file) queryset_to_disk(freeze.edurepsource_set, json_file) queryset_to_disk(freeze.edurepharvest_set, json_file) queryset_to_disk(freeze.indices, json_file) queryset_to_disk(freeze.collection_set, json_file) queryset_to_disk(freeze.arrangement_set, json_file) queryset_to_disk(freeze.document_set, json_file) self.dump_resources()