def get_s3_objects(uuid,bucket,name,destination,s3cfg_dir,engine,logger): if pipe_util.already_have(destination,name,logger): logger.info('already have object(s) %s in %s' % (name,destination)) else: logger.info('downloading object(s) %s to %s' % (name,destination)) base_name=os.path.splitext(name)[0] s3_path=os.path.join('s3://',bucket,base_name) home_dir=os.path.expanduser('~') s3cmd_path=os.path.join(home_dir,'.local','bin','s3cmd') cmd=[s3cmd_path,'-c',os.path.join(s3cfg_dir,'.s3cfg'),'sync',s3_path,destination] output=pipe_util.do_command(cmd,logger) df=time_util.store_time(uuid,cmd,output,logger) df['bucket']=bucket df['name']=name table_name='time_mem_s3_sync' unique_key_dict={'bucket':bucket,'name':name} df_util.save_df_to_sqlalchemy(df,unique_key_dict,table_name,engine,logger) pipe_util.create_have(destination,name,logger) logger.info('finished downloading object(s) %s to %s' % (name,destination)) return
def get_s3_objects(uuid,bucket,name,destination,logger): #sync_name=name.split('.')[0]#temp hack to get reference.dict needed by GATK UG/HC if pipe_util.already_have(destination,name,logger): logger.info('already have object(s) %s in %s' % (name,destination)) else: