async def download_data(): AK = 'IJKOCBW6LYDVI6Y1WW0Q' SK = '7SCcFD6ROXwCAN1B5ios2tapwhvsStZMG1qxrOxy' server = 'https://obs.ap-southeast-2.myhuaweicloud.com' bucketName = 'obs-mapper-cie' objectKey = 'Data/Weather/Bangkok.csv' localFile = 'Data/Weather/Bangkok.csv' objectKey2 = 'Data/TrafficCondition/AverageSpeed/AverageSpeed.csv' localFile2 = 'Data/TrafficCondition/AverageSpeed/AverageSpeed.csv' objectKey3 = 'Data/Road/RoadInformation.csv' localFile3 = 'Data/Road/RoadInformation.csv' obsClient = ObsClient(access_key_id=AK, secret_access_key=SK, server=server) # print('Downloading an object as a socket stream\n') # print('Downloading an object to :' + localFile + '\n') resp = obsClient.getObject(bucketName, objectKey, downloadPath=localFile) resp2 = obsClient.getObject(bucketName, objectKey2, downloadPath=localFile2) resp3 = obsClient.getObject(bucketName, objectKey3, downloadPath=localFile3) # asyncio.run(download_data())
def download_files_from_obs(access_key, secret_key, end_point, bucket_name, prefix, download_path): obsClient = ObsClient(access_key_id=access_key, secret_access_key=secret_key, server=end_point, long_conn_mode=True) files = list_obs_files(obsClient, bucket_name, prefix) numOfFiles = len(files) print(numOfFiles) num = 0 for file in files: num = num + 1 obsClient.getObject(bucket_name, file, download_path + file) obsClient.close() return LOCAL_FILE_PREFIX + download_path + prefix
def doGetObject(lock, completedBlocks, bucketName, objectKey, startPos, endPos, i): if IS_WINDOWS: global obsClient else: obsClient = ObsClient(access_key_id=AK, secret_access_key=SK, server=server) resp = obsClient.getObject(bucketName, objectKey, headers=GetObjectHeader(range='%d-%d' % (startPos, endPos))) if resp.status < 300: response = resp.body.response chunk_size = 65536 if response is not None: with open(localFilePath, 'rb+') as f: f.seek(startPos, 0) while True: chunk = response.read(chunk_size) if not chunk: break f.write(chunk) response.close() print('Part#' + str(i + 1) + 'done\n') with lock: completedBlocks.value += 1 else: print('\tPart#' + str(i + 1) + ' failed\n')
def read_obs_files(key, secret, end_point, bucket_name, prefix, downloadPath): obsClient = ObsClient( access_key_id=key, secret_access_key=secret, server=end_point, long_conn_mode=True ) files = list_obs_files(obsClient, bucket_name, prefix) numOfFiles = len(files) print(numOfFiles) num = 0 for file in files: num = num + 1 # obsClient.l obsClient.getObject(bucket_name, file, downloadPath=downloadPath + file) # resp.body.buffer if 0 == num % (numOfFiles / 10): print(str(num) + ":" + file) obsClient.close()
def download_obs(local_path, obs_path): TestObs = ObsClient(AK, SK, is_secure=True, server=obs_endpoint) bucketName = obs_path.split("/", 1)[0] resultFileName = obs_path.split("/", 1)[1] resp = TestObs.getObject(bucketName, resultFileName, downloadPath=local_path) if resp.status < 300: print('Succeeded to download training dataset') else: print('Failed to download ')
def download_dataset(): print("Start to download dataset from OBS") TestObs = ObsClient(AK, SK, is_secure=True, server=obs_endpoint) try: bucketName = data_path.split("/",1)[0] resultFileName = data_path.split("/",1)[1] + "/ratings.csv" resp = TestObs.getObject(bucketName, resultFileName, downloadPath=LOCAL_DATA_DIR) if resp.status < 300: print('Succeeded to download training dataset') else: print('Failed to download ') finally: TestObs.close()
def download_dataset(): print("Start to download dataset from OBS") obs_client = ObsClient(AK, SK, is_secure=True, server=obs_endpoint) try: bucket_name = data_path.split("/", 1)[0] train_file = data_path.split("/", 1)[1] + "/ratings.csv" resp = obs_client.getObject(bucket_name, train_file, downloadPath=LOCAL_DATA_PATH) if resp.status < 300: print('Succeeded to download training dataset') else: print('Failed to download ') raise Exception('Failed to download training dataset from OBS !') finally: obs_client.close()
def download_dataset(file_name, local_path): print("Start to download dataset from OBS") obs_client = ObsClient(AK, SK, is_secure=True, server=obs_endpoint) try: bucket_name = data_path.split("/", 1)[0] train_file = data_path.split("/", 1)[1] + "/" + file_name response = obs_client.getObject(bucket_name, train_file, downloadPath=local_path) if response.status < 300: print('succeeded to download file') else: print('failed to download file ') raise Exception('download file from OBS fail.') finally: obs_client.close()
async def download_file(objectKey,localFile): # AK = 'N02JFWTL4SEDYUQSWNLF' # SK = '9x5ZJ1HoXkI4L9NuOBTdsxQ2eFDh3txBohqGonT8' # server = 'https://obs.ap-southeast-2.myhuaweicloud.com' # bucketName = 'obs-mapper' AK = 'IJKOCBW6LYDVI6Y1WW0Q' SK = '7SCcFD6ROXwCAN1B5ios2tapwhvsStZMG1qxrOxy' server = 'https://obs.ap-southeast-2.myhuaweicloud.com' bucketName = 'obs-mapper-cie' # objectKey = 'Data/Weather/Bangkok.csv' # localFile = 'Data/Weather/Bangkok.csv' obsClient = ObsClient(access_key_id=AK, secret_access_key=SK, server=server) # print('Downloading an object as a socket stream\n') print('Downloading an object to :' + localFile + '\n') resp = obsClient.getObject(bucketName, objectKey, downloadPath=localFile) # asyncio.run(download_file())
filename1 = "podserver_ip.csv" filename2 = "platform_ip.csv" filename3 = "cdn_ip.csv" for filename in [filename1, filename2, filename3]: try: try: print(filename) # 创建ObsClient实例 obsClient = ObsClient( access_key_id='', secret_access_key='', server='https://obs.cn-north-4.myhuaweicloud.com') resp = obsClient.getObject( '1', "basedata/" + filename.replace(".csv", "") + "/" + filename, downloadPath="1/" + filename) if resp.status < 300: print('requestId:', resp.requestId) print('url:', resp.body.url) insertMysql(filename) else: print('errorCode:', resp.errorCode) print('errorMessage:', resp.errorMessage) os.remove("D:\\python\\1\\" + filename) except: time.sleep(10) print(filename) # 创建ObsClient实例 obsClient = ObsClient(
# -*- coding: utf-8 -*- """ Created on Mon Dec 3 14:19:17 2018 @author: yy """ from obs import ObsClient obsClient = ObsClient( access_key_id='XXXXXXXXXXXXX', secret_access_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', server='https://obs.cn-north-1.myhuaweicloud.com' ) resp = obsClient.getObject('hs-grch38.p12-genome', 'genome/GRCh38.p12_part1.fna', downloadPath='/home/webservice/ref/GRCh38.p12_part1.fna') if resp.status < 300: # 输出请求Id print('requestId:', resp.requestId) else: # 输出错误码 print('errorCode:', resp.errorCode) obsClient.close()
class modelarts_handler(): def __init__(self): self.output_url = None def create_obs_output_dirs(self, output_url): bucket_name = output_url[5:].split('/')[0] sub_dir = output_url.replace(f"s3://{bucket_name}/", "", 1) logger.debug('create obs output{} subdir:{} bucket:{}'.format( output_url, sub_dir, bucket_name)) resp = self.obsClient.putContent(bucket_name, sub_dir, content=None) if resp.status < 300: logger.debug('obs put content request ok') else: logger.warn('errorCode:{} msg:{}'.format(resp.errorCode, resp.errorMessage)) raise RuntimeError('failed') def create_obs_handler(self, access_config): # 创建 obs登录句柄 self.obsClient = ObsClient( access_key_id=access_config.access_key, secret_access_key=access_config.secret_access_key, server=access_config.server) def create_session(self, access_config): # 如下配置针对计算中心等专有云 通用云不需要设置 if access_config.get("iam_endpoint") != "" and access_config.get("obs_endpoint") != "" \ and access_config.get("modelarts_endpoint") != "": Session.set_endpoint(iam_endpoint=access_config.iam_endpoint, obs_endpoint=access_config.obs_endpoint, \ modelarts_endpoint=access_config.modelarts_endpoint, region_name=access_config.region_name) # 创建modelarts句柄 self.session = Session(access_key=access_config.access_key, secret_key=access_config.secret_access_key, project_id=access_config.project_id, region_name=access_config.region_name) def print_train_instance_types(self): algo_info = Estimator.get_train_instance_types( modelarts_session=self.session) print("get valid train_instance_types:{}".format(algo_info)) def stop_new_versions(self, session_config): base_job_list_info = Estimator.get_job_list( modelarts_session=self.session, per_page=10, page=1, order="asc", search_content=session_config.job_name) if base_job_list_info == None or base_job_list_info.get( "job_total_count", 0) == 0: print("find no match version return") return else: pre_version_id = base_job_list_info["jobs"][0].get("version_id") job_id = base_job_list_info["jobs"][0].get("job_id") job_status = base_job_list_info["jobs"][0].get("status") estimator = Estimator(modelarts_session=self.session, job_id=job_id, version_id=pre_version_id) if JOB_STATE[job_status] == "JOBSTAT_INIT" \ or JOB_STATE[job_status] == "JOBSTAT_IMAGE_CREATING" \ or JOB_STATE[job_status] == "JOBSTAT_SUBMIT_TRYING" \ or JOB_STATE[job_status] == "JOBSTAT_DEPLOYING" \ or JOB_STATE[job_status] == "JOBSTAT_RUNNING": status = estimator.stop_job_version() print( "jobname:{} jobid:{} preversionid:{} jobstatus:{} stop status:{}" .format(session_config.job_name, job_id, pre_version_id, JOB_STATE[job_status], status)) else: print( "jobname:{} jobid:{} preversionid:{} jobstatus:{} no need stop" .format(session_config.job_name, job_id, pre_version_id, JOB_STATE[job_status])) return def get_job_name_next_new_version(self, session_config): base_job_list_info = Estimator.get_job_list( modelarts_session=self.session, per_page=10, page=1, order="asc", search_content=session_config.job_name) if base_job_list_info == None or base_job_list_info.get( "job_total_count", 0) == 0: return 1 else: pre_version_id = base_job_list_info["jobs"][0].get("version_id") job_id = base_job_list_info["jobs"][0].get("job_id") estimator = Estimator(modelarts_session=self.session, job_id=job_id, version_id=pre_version_id) job_info = estimator.get_job_info() pre_version_id = job_info.get("version_name", "V0")[1:] return int(pre_version_id) + 1 def get_obs_url_content(self, obs_url): bucket_name = obs_url[5:].split('/')[0] obs_sub_path = obs_url.replace(f"s3://{bucket_name}/", "", 1) resp = self.obsClient.getObject(bucket_name, obs_sub_path, loadStreamInMemory=True) if resp.status < 300: logger.debug('request ok') return resp.body.buffer.decode("utf-8") else: raise RuntimeError( 'obs get object ret:{} url:{} bucket:{} path:{}'.format( resp.status, obs_url, bucket_name, obs_sub_path)) def update_code_to_obs(self, session_config, localpath): # 待完善 验证 bucket_name = session_config.code_dir.split('/')[1] sub_dir = "/".join(session_config.code_dir.strip("/").split('/')[1:]) logger.info("update code codepath:{} bucket:{} subdir:{}".format( session_config.code_dir, bucket_name, sub_dir)) resp = self.obsClient.putFile(bucket_name, sub_dir, localpath) # logger.debug("lcm resp:{}".format(resp)) # print("lcm resp:{}".format(resp)) # if resp.status < 300: # logger.debug('obs put content request ok') # else: # logger.warn('errorCode:{} msg:{}'.format(resp.errorCode, resp.errorMessage)) # raise RuntimeError('failed') def create_modelarts_job(self, session_config, output_url): jobdesc = session_config.job_description_prefix + "_jobname_" + session_config.job_name + "_" + str( session_config.train_instance_type) + "_" + str( session_config.train_instance_count) estimator = Estimator( modelarts_session=self.session, framework_type=session_config.framework_type, framework_version=session_config.framework_version, code_dir=session_config.code_dir, boot_file=session_config.boot_file, log_url=output_url[4:], hyperparameters=session_config.hyperparameters, output_path=output_url[4:], pool_id=get_config_value(session_config, "pool_id"), train_instance_type=get_config_value(session_config, "train_instance_type"), train_instance_count=session_config.train_instance_count, nas_type=get_config_value(session_config, "nas_type"), nas_share_addr=get_config_value(session_config, "nas_share_addr"), nas_mount_path=get_config_value(session_config, "nas_mount_path"), job_description=jobdesc, user_command=None) base_job_list_info = Estimator.get_job_list( modelarts_session=self.session, per_page=10, page=1, order="asc", search_content=session_config.job_name) if base_job_list_info == None or base_job_list_info.get( "job_total_count", 0) == 0: logger.debug("new create inputs:{} job_name:{}".format( session_config.inputs, session_config.job_name)) job_instance = estimator.fit(inputs=session_config.inputs, wait=False, job_name=session_config.job_name) else: job_id = base_job_list_info["jobs"][0].get("job_id") pre_version_id = base_job_list_info["jobs"][0].get("version_id") logger.debug("new versions job_id:{} pre_version_id:{}".format( job_id, pre_version_id)) job_instance = estimator.create_job_version( job_id=job_id, pre_version_id=pre_version_id, inputs=session_config.inputs, wait=False, job_desc=jobdesc) print("inputs:{} job_name:{} ret instance:{}".format( session_config.inputs, session_config.job_name, job_instance)) job_info = job_instance.get_job_info() if not job_info['is_success']: logger.error("failed to run job on modelarts, msg %s" % (job_info['error_msg'])) raise RuntimeError('failed') print( "create sucess job_id:{} resource_id:{} version_name:{} create_time:{}" .format(job_info["job_id"], job_info["resource_id"], job_info["version_name"], job_info["create_time"])) return job_instance def run_job(self, session_config, localpath): logger.debug("session config:{}".format(session_config)) self.print_train_instance_types() # 获取job_name的next 版本号 next_version_id = self.get_job_name_next_new_version(session_config) # 生成输出路径 self.output_url = os.path.join( "s3:/{}".format(session_config.out_base_url), "V{}".format(next_version_id), "") logger.debug("output_url:{}".format(self.output_url)) self.create_obs_output_dirs(self.output_url) # 更新代码到obs上 self.update_code_to_obs(session_config, localpath) job_instance = self.create_modelarts_job(session_config, self.output_url) wait_for_job(job_instance)
class bucket(): def __init__(self, access_key_id=None, secret_access_key=None, server=None, bucketName=None): # 创建ObsClient实例 self.base_folder = "/" if access_key_id == None or secret_access_key == None or server == None: print( "# ----- Error invalid OBS server config please check parameter of obs server" ) exit(0) else: self.obsClient = ObsClient(access_key_id=access_key_id, secret_access_key=secret_access_key, server=server) if bucketName != None: self.bucketName = bucketName self.bucketClient = self.obsClient.bucketClient(bucketName) print( "# ---------------------------------------------------------------------------- #" ) print( "# Bucket ToolKit #" ) print( "# ---------------------------------------------------------------------------- #" ) print("# ----access key (AK) : ", access_key_id) print("# ----secret key (SK): ", secret_access_key) print("# ----server : ", server) print("# ----bucket name : ", bucketName) print("# ----root : ", self.base_folder) print( "# ---------------------------------------------------------------------------- #" ) def getBucketMetadata(self): print('Getting bucket metadata\n') #resp = obsClient.getBucketMetadata(bucketName, origin='http://www.b.com', requestHeaders='Authorization1') resp = self.bucketClient.getBucketMetadata( origin='http://www.b.com', requestHeaders='Authorization1') print('storageClass:', resp.body.storageClass) print('accessContorlAllowOrigin:', resp.body.accessContorlAllowOrigin) print('accessContorlMaxAge:', resp.body.accessContorlMaxAge) print('accessContorlExposeHeaders:', resp.body.accessContorlExposeHeaders) print('accessContorlAllowMethods:', resp.body.accessContorlAllowMethods) print('accessContorlAllowHeaders:', resp.body.accessContorlAllowHeaders) print('Deleting bucket CORS\n') resp = self.bucketClient.deleteBucketCors() print('status' + str(resp.status)) return resp def upload(self, remote_path, local_path): self.obsClient.putFile(self.bucketName, remote_path, local_path) # print("# ===== Uploading ",local_path," ===to : ",remote_path) def download(self, key, download): # print("# ===== Downloading ",key," === to :",download) self.obsClient.getObject(self.bucketName, key, downloadPath=download) def cd(self, folder_key): self.base_folder = folder_key print("# ===== Base Folder", self.base_folder) def delete(self, key): print('# ===== Deleting object ' + key + '\n') self.obsClient.deleteObject(self.bucketName, key) def check(self, key): """ The Sync will overwrite by default. We need check """ assert not self.obsClient.getObject( self.bucketName, key )["status"] < 300, "\n# ===== ERROR : \n# ===== bucket : ({bucketname})\n# ===== key : ({key}) & local upload flow try to overwrite same key".format( bucketname=self.bucketName, key=key) # def mkdir(self,dir): # pass def ls(self, show_item_count=10): print("# ===== list ({path}): ".format(path=self.base_folder)) # resp = self.obsClient.listObjects(self.bucketName) if self.base_folder == "/": resp = self.obsClient.listObjects(self.bucketName) else: resp = self.obsClient.listObjects(self.bucketName, self.base_folder) keylist = [] print("# ===== object count : ", len(resp.body.contents)) for content in resp.body.contents[:show_item_count]: keylist.append(content.key) print(' |--- : ' + content.key + ' etag[' + content.etag + ']') return keylist
print('Create a new cold bucket for demo\n') obsClient.createBucket(bucketName, CreateBucketHeader(storageClass=StorageClass.COLD)) # Create a cold object print('Create a new cold object for demo\n') obsClient.putContent(bucketName, objectKey, 'Hello OBS') # Restore the cold object print('Restore the cold object') obsClient.restoreObject(bucketName, objectKey, 1, tier=RestoreTier.EXPEDITED) # Wait 6 minute to get the object time.sleep(6 * 60) # Get the cold object status print('Get the cold object status') resp = obsClient.restoreObject(bucketName, objectKey, 1, tier=RestoreTier.EXPEDITED) print('\tstatus code ' + str(resp.status)) # Get the cold object print('Get the cold object') resp = obsClient.getObject(bucketName, objectKey, loadStreamInMemory=True) print('\tobject content:%s' % resp.body.buffer) # Delete the cold object obsClient.deleteObject(bucketName, objectKey)
# Constructs a obs client instance with your account for accessing OBS obsClient = ObsClient(access_key_id=AK, secret_access_key=SK, server=server) # Create bucket print('Create a new bucket for demo\n') obsClient.createBucket(bucketName) sampleFilePath = '/temp/test.txt' # Upload an object to your bucket print('Uploading a new object to OBS from a file\n') obsClient.putFile(bucketName, objectKey, createSampleFile(sampleFilePath)) print('Downloading an object as a socket stream\n') # Download the object as a socket stream and display it directly resp = obsClient.getObject(bucketName, objectKey, downloadPath=None) if resp.status < 300: response = resp.body.response chunk_size = 65536 if response is not None: while True: chunk = response.read(chunk_size) if not chunk: break print(chunk) response.close() # Download the object to a file print('Downloading an object to :' + localFile + '\n') obsClient.getObject(bucketName, objectKey, downloadPath=localFile)