def download(k): try: return cloud_helpers.download_file_from_s3(bucket, k) except: try: return cloud_helpers.download_file_from_s3(bucket, k) except Exception, e: return e
def process_s3_file( input_bucket_name, input_key_name, output_bucket_name_1ms=None, output_bucket_name_100ms=None, overwrite=False ): input_filename = cloud_helpers.download_file_from_s3(input_bucket_name, input_key_name) dest_1ms, dest_100ms = output_filenames(input_filename, cloud_helpers.scratch_dir()) out_key_name_1ms = os.path.split(dest_1ms)[1] out_key_name_100ms = os.path.split(dest_100ms)[1] if output_bucket_name_1ms is None: output_bucket_name_1ms = input_bucket_name + "-hdf-1ms" if output_bucket_name_100ms is None: output_bucket_name_100ms = input_bucket_name + "-hdf" feature_names = extractor.feature_names() if ( not overwrite and cloud_helpers.hdf_already_on_s3(output_bucket_name_1ms, out_key_name_1ms, feature_names) and cloud_helpers.hdf_already_on_s3(output_bucket_name_100ms, out_key_name_100ms, feature_names) ): print "HDFs on S3 have same features, so skipping this input..." return else: print "HDFs either not on S3 or have different features..." # In some weird situation we might have a local copy of the HDF already # finished but it just might not have been uploaded yet if ( not overwrite and hdf.complete_hdf_exists(dest_1ms, feature_names) and hdf.complete_hdf_exists(dest_100ms, feature_names) ): print "Found finished HDFs on local storage..." header = hdf.header_from_hdf_filename(dest_1ms) else: print "Running feature generator..." header = process_local_file(input_filename, dest_1ms, dest_100ms) print "Header:", header print "Uploading 1ms feature file", dest_1ms, "to", output_bucket_name_1ms, "/", out_key_name_1ms cloud_helpers.upload_file_to_s3(dest_1ms, output_bucket_name_1ms, out_key_name_1ms, header) print "Uploading 100ms feature file", dest_100ms, "to", output_bucket_name_100ms, "/", out_key_name_100ms cloud_helpers.upload_file_to_s3(dest_100ms, output_bucket_name_100ms, out_key_name_100ms, header)
def process_s3_file(input_bucket_name, input_key_name, output_bucket_name_1ms = None, output_bucket_name_100ms = None, overwrite = False): input_filename = cloud_helpers.download_file_from_s3(input_bucket_name, input_key_name) dest_1ms, dest_100ms = output_filenames(input_filename, cloud_helpers.scratch_dir()) out_key_name_1ms = os.path.split(dest_1ms)[1] out_key_name_100ms = os.path.split(dest_100ms)[1] if output_bucket_name_1ms is None: output_bucket_name_1ms = input_bucket_name + "-hdf-1ms" if output_bucket_name_100ms is None: output_bucket_name_100ms = input_bucket_name + "-hdf" feature_names = extractor.feature_names() if not overwrite and \ cloud_helpers.hdf_already_on_s3(output_bucket_name_1ms, out_key_name_1ms, feature_names) and \ cloud_helpers.hdf_already_on_s3(output_bucket_name_100ms, out_key_name_100ms, feature_names): print "HDFs on S3 have same features, so skipping this input..." return else: print "HDFs either not on S3 or have different features..." # In some weird situation we might have a local copy of the HDF already # finished but it just might not have been uploaded yet if not overwrite and hdf.complete_hdf_exists(dest_1ms, feature_names) and \ hdf.complete_hdf_exists(dest_100ms, feature_names): print "Found finished HDFs on local storage..." header = hdf.header_from_hdf_filename(dest_1ms) else: print "Running feature generator..." header = process_local_file(input_filename, dest_1ms, dest_100ms) print "Header:", header print "Uploading 1ms feature file", dest_1ms, "to", output_bucket_name_1ms, "/", out_key_name_1ms cloud_helpers.upload_file_to_s3(\ dest_1ms, output_bucket_name_1ms, out_key_name_1ms, header) print "Uploading 100ms feature file", dest_100ms, "to", output_bucket_name_100ms, "/", out_key_name_100ms cloud_helpers.upload_file_to_s3(\ dest_100ms, output_bucket_name_100ms, out_key_name_100ms, header)