def lambda_handler(event, context): print("Lambda or NAT IP Address:", instance_id) logger.info(json.dumps(event, default=str)) for trigger_record in event['Records']: trigger_body = trigger_record['body'] job = json.loads(trigger_body) logger.info(json.dumps(job, default=str)) # 跳过初次配置时候, S3 自动写SQS的访问测试记录 if 'Event' in job: if job['Event'] == 's3:TestEvent': logger.info('Skip s3:TestEvent') continue # 判断是S3来的消息,而不是jodsender来的就转换一下 if 'Records' in job: # S3来的消息带着'Records' for One_record in job['Records']: if 's3' in One_record: Src_bucket = One_record['s3']['bucket']['name'] Src_key = One_record['s3']['object']['key'] Src_key = urllib.parse.unquote_plus(Src_key) Size = One_record['s3']['object']['size'] Des_bucket, Des_prefix = Des_bucket_default, Des_prefix_default Des_key = str(PurePosixPath(Des_prefix) / Src_key) if Src_key[-1] == '/': # 针对空目录对象 Des_key += '/' job = { 'Src_bucket': Src_bucket, 'Src_key': Src_key, 'Size': Size, 'Des_bucket': Des_bucket, 'Des_key': Des_key } if 'Des_bucket' not in job: # 消息结构不对 logger.warning(f'Wrong sqs job: {json.dumps(job, default=str)}') logger.warning('Try to handle next message') raise WrongRecordFormat # TODO: 如果是一次多条Job进来这里暂时没做并发处理,并且一半失败的问题未处理,所以目前不要处理SQS Batch if job['Size'] > ResumableThreshold: upload_etag_full = step_function(job, table, s3_src_client, s3_des_client, instance_id, StorageClass, ChunkSize, MaxRetry, MaxThread, JobTimeout, ifVerifyMD5Twice, CleanUnfinishedUpload) else: upload_etag_full = step_fn_small_file(job, table, s3_src_client, s3_des_client, instance_id, StorageClass, MaxRetry) if upload_etag_full != "TIMEOUT" and upload_etag_full != "ERR": continue else: raise TimeoutOrMaxRetry return {'statusCode': 200, 'body': 'Jobs completed'}
def lambda_handler(event, context): print("Lambda or NAT IP Address:", instance_id) logger.info(json.dumps(event, default=str)) for trigger_record in event['Records']: trigger_body = trigger_record['body'] job = json.loads(trigger_body) logger.info(json.dumps(job, default=str)) # 跳过初次配置时候, S3 自动写SQS的访问测试记录 if 'Event' in job: if job['Event'] == 's3:TestEvent': logger.info('Skip s3:TestEvent') continue # 判断是S3来的消息,而不是jodsender来的就转换一下 if 'Records' in job: # S3来的消息带着'Records' for One_record in job['Records']: if 's3' in One_record: Src_bucket = One_record['s3']['bucket']['name'] Src_key = One_record['s3']['object']['key'] Src_key = urllib.parse.unquote_plus(Src_key) Size = One_record['s3']['object']['size'] if "versionId" in One_record['s3']['object']: versionId = One_record['s3']['object']['versionId'] else: versionId = 'null' Des_bucket, Des_prefix = Des_bucket_default, Des_prefix_default Des_key = str(PurePosixPath(Des_prefix) / Src_key) if Src_key[-1] == '/': # 针对空目录对象 Des_key += '/' job = { 'Src_bucket': Src_bucket, 'Src_key': Src_key, 'Size': Size, 'Des_bucket': Des_bucket, 'Des_key': Des_key, 'versionId': versionId } if 'Des_bucket' not in job: # 消息结构不对 logger.warning(f'Wrong sqs job: {json.dumps(job, default=str)}') logger.warning('Try to handle next message') raise WrongRecordFormat if 'versionId' not in job: job['versionId'] = 'null' # TODO: 如果是一次多条Job并且出现一半失败的问题未处理,所以目前只设置SQS Batch=1 if job['Size'] > ResumableThreshold: upload_etag_full = step_function( job, table, s3_src_client, s3_des_client, instance_id, StorageClass, ChunkSize, MaxRetry, MaxThread, JobTimeout, ifVerifyMD5Twice, CleanUnfinishedUpload, UpdateVersionId, GetObjectWithVersionId) else: upload_etag_full = step_fn_small_file(job, table, s3_src_client, s3_des_client, instance_id, StorageClass, MaxRetry, UpdateVersionId, GetObjectWithVersionId) if upload_etag_full != "TIMEOUT" and upload_etag_full != "ERR": # 如果是超时或ERR的就不删SQS消息,是正常结束就删 # 大文件会在退出线程时设 MaxRetry 为 TIMEOUT,小文件则会返回 MaxRetry # 小文件出现该问题可以认为没必要再让下一个worker再试了,不是因为文件下载太大导致,而是权限设置导致 # 直接删除SQS,并且DDB并不会记录结束状态 # 如果希望小文件也继续让SQS消息恢复,并让下一个worker再试,则在上面判断加upload_etag_full != "MaxRetry" continue else: raise TimeoutOrMaxRetry return {'statusCode': 200, 'body': 'Jobs completed'}