def get_s3_objects(uuid,bucket,name,destination,s3cfg_dir,engine,logger): if pipe_util.already_have(destination,name,logger): logger.info('already have object(s) %s in %s' % (name,destination)) else: logger.info('downloading object(s) %s to %s' % (name,destination)) base_name=os.path.splitext(name)[0] s3_path=os.path.join('s3://',bucket,base_name) home_dir=os.path.expanduser('~') s3cmd_path=os.path.join(home_dir,'.local','bin','s3cmd') cmd=[s3cmd_path,'-c',os.path.join(s3cfg_dir,'.s3cfg'),'sync',s3_path,destination] output=pipe_util.do_command(cmd,logger) df=time_util.store_time(uuid,cmd,output,logger) df['bucket']=bucket df['name']=name table_name='time_mem_s3_sync' unique_key_dict={'bucket':bucket,'name':name} df_util.save_df_to_sqlalchemy(df,unique_key_dict,table_name,engine,logger) pipe_util.create_have(destination,name,logger) logger.info('finished downloading object(s) %s to %s' % (name,destination)) return
scratch_dir=args['scratch_dir'] thread_count=args['thread_count'] def get_s3_objects(uuid,bucket,name,destination,logger): #sync_name=name.split('.')[0]#temp hack to get reference.dict needed by GATK UG/HC if pipe_util.already_have(destination,name,logger): logger.info('already have object(s) %s in %s' % (name,destination)) else: logger.info('downloading object(s) %s to %s' % (name,destination)) base_name=os.path.splitext(name)[0] s3_path=os.path.join('s3://',bucket,base_name) cmd=['s3cmd','sync',s3_path,destination] output=pipe_util.do_command(cmd,logger) pipe_util.create_have(destination,name,logger) df=time_util.store_time(uuid,cmd,output,logger) def main(): ##logging uuid=pipe_util.get_uuid_from_path(bam_analysis_id) logging.basicConfig(filename='vcf_'+uuid+'.log',level=logging.DEBUG,filemode='a', format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d_%H:%M:%S_%Z') logger=logging.getLogger(__name__) ##open stats and timing db home_dir=os.path.expanduser('~') db_path=os.path.join(home_dir,'vcf_pipe.sqlite') logger.info('db_path=%s' % db_path)