def save_aggregation(self, filename, boundaries, impactcode, boundarycode, categories, fields, use_parallel=True): """ Save data aggregated to geospatial regions :param str filename: Destination filename :param bool use_parallel: True for parallel behaviout, which is only node 0 writing to file """ LOGGER.info("Saving aggregated data") boundaries = misc.download_file_from_s3_if_needed(boundaries) [filename, bucket_name, bucket_key] = \ misc.create_temp_file_path_for_s3(filename) write_dict = self.exposure_att.copy() dt = datetime.now().strftime(DATEFMT) atts = { "prov:type": "void:Dataset", "prov:atLocation": os.path.basename(boundaries), "prov:generatedAtTime": misc.get_file_mtime(boundaries), "void:boundary_code": boundarycode } bdyent = self.prov.entity(":Aggregation boundaries", atts) aggact = self.prov.activity(":AggregationByRegions", dt, None, { 'prov:type': "Spatial aggregation", 'void:functions': repr(fields) }) aggatts = { "prov:type": "void:Dataset", "prov:atLocation": os.path.basename(filename), "prov:generatedAtTime": dt } aggfileent = self.prov.entity(":AggregationFile", aggatts) self.prov.used(aggact, bdyent) self.prov.wasInformedBy(aggact, self.provlabel) self.prov.wasGeneratedBy(aggfileent, aggact) if parallel.STATE.rank == 0 or not use_parallel: aggregate.choropleth(write_dict, boundaries, impactcode, boundarycode, filename, fields, categories) misc.upload_to_s3_if_applicable(filename, bucket_name, bucket_key) if (bucket_name is not None and bucket_key is not None and bucket_key.endswith('.shp')): [rootname, ext] = os.path.splitext(filename) base_bucket_key = bucket_key[:-len(ext)] misc.upload_to_s3_if_applicable(rootname + '.dbf', bucket_name, base_bucket_key + '.dbf') misc.upload_to_s3_if_applicable(rootname + '.shx', bucket_name, base_bucket_key + '.shx') misc.upload_to_s3_if_applicable(rootname + '.prj', bucket_name, base_bucket_key + '.prj') misc.upload_to_s3_if_applicable(rootname + '.cpg', bucket_name, base_bucket_key + '.cpg', True)
def test_upload_to_s3_if_applicable(self): s3 = get_s3_client() s3.create_bucket(Bucket='bucket') directory_path = get_temporary_directory() file_path = os.path.join(directory_path, 'file.ext') print('Test file', file=open(file_path, 'w')) upload_to_s3_if_applicable(file_path, 'bucket', 'subdir/file.ext') response = s3.head_object(Bucket='bucket', Key='subdir/file.ext') self.assertTrue(response['ContentLength'] > 0)
def __call__(self, context, file_name=None): """ Save provenance information. By default we save to xml format. """ [file_name, bucket_name, bucket_key] = \ misc.create_temp_file_path_for_s3(file_name) [basename, ext] = os.path.splitext(file_name) dot = prov_to_dot(context.prov) dot.write_png(basename + '.png') context.prov.serialize(file_name, format='xml') if bucket_key is not None: misc.upload_to_s3_if_applicable(file_name, bucket_name, bucket_key) misc.upload_to_s3_if_applicable(basename + '.png', bucket_name, bucket_key[:-len(ext)] + '.png')
def save_exposure_atts(self, filename, use_parallel=True): """ Save the exposure attributes, including latitude and longitude. The file type saved is based on the filename extension. Options '.npz': Save the arrays into a single file in uncompressed .npz format. :param use_parallel: Set to True for parallel behaviour Which is only node 0 writing to file. :param filename: The file to be written. :return write_dict: The whole dictionary, returned for testing. """ [filename, bucket_name, bucket_key] = \ misc.create_temp_file_path_for_s3(filename) s1 = self.prov.entity( ":HazImp output file", { "prov:label": "Full HazImp output file", "prov:type": "void:Dataset", "prov:atLocation": os.path.basename(filename) }) a1 = self.prov.activity(":SaveImpactData", datetime.now().strftime(DATEFMT), None) self.prov.wasGeneratedBy(s1, a1) self.prov.wasInformedBy(a1, self.provlabel) write_dict = self.exposure_att.copy() write_dict[EX_LAT] = self.exposure_lat write_dict[EX_LONG] = self.exposure_long if use_parallel: assert misc.INTID in write_dict write_dict = parallel.gather_dict(write_dict, write_dict[misc.INTID]) if parallel.STATE.rank == 0 or not use_parallel: if filename[-4:] == '.csv': save_csv(write_dict, filename) else: numpy.savez(filename, **write_dict) misc.upload_to_s3_if_applicable(filename, bucket_name, bucket_key) # The write_dict is returned for testing # When running in paralled this is a way of getting all # of the context info return write_dict
def test_upload_to_s3_missing_local_file(self): with self.assertRaises(FileNotFoundError): upload_to_s3_if_applicable('invalid.zip', 'bucket', 'key')