예제 #1
0
파일: aln.py 프로젝트: Shenglai/apipe
def get_s3_objects(uuid,bucket,name,destination,s3cfg_dir,engine,logger):
    if pipe_util.already_have(destination,name,logger):
        logger.info('already have object(s) %s in %s' % (name,destination))
    else:
        logger.info('downloading object(s) %s to %s' % (name,destination))
        base_name=os.path.splitext(name)[0]
        s3_path=os.path.join('s3://',bucket,base_name)
        home_dir=os.path.expanduser('~')
        s3cmd_path=os.path.join(home_dir,'.local','bin','s3cmd')
        cmd=[s3cmd_path,'-c',os.path.join(s3cfg_dir,'.s3cfg'),'sync',s3_path,destination]
        output=pipe_util.do_command(cmd,logger)
        df=time_util.store_time(uuid,cmd,output,logger)
        df['bucket']=bucket
        df['name']=name
        table_name='time_mem_s3_sync'
        unique_key_dict={'bucket':bucket,'name':name}
        df_util.save_df_to_sqlalchemy(df,unique_key_dict,table_name,engine,logger)
        pipe_util.create_have(destination,name,logger)
        logger.info('finished downloading object(s) %s to %s' % (name,destination))
    return
예제 #2
0
파일: vcf.py 프로젝트: Shenglai/apipe
scratch_dir=args['scratch_dir']
thread_count=args['thread_count']



def get_s3_objects(uuid,bucket,name,destination,logger):
    #sync_name=name.split('.')[0]#temp hack to get reference.dict needed by GATK UG/HC
    if pipe_util.already_have(destination,name,logger):
        logger.info('already have object(s) %s in %s' % (name,destination))
    else:
        logger.info('downloading object(s) %s to %s' % (name,destination))
        base_name=os.path.splitext(name)[0]
        s3_path=os.path.join('s3://',bucket,base_name)
        cmd=['s3cmd','sync',s3_path,destination]
        output=pipe_util.do_command(cmd,logger)
        pipe_util.create_have(destination,name,logger)
        df=time_util.store_time(uuid,cmd,output,logger)
        


def main():
    ##logging
    uuid=pipe_util.get_uuid_from_path(bam_analysis_id)
    logging.basicConfig(filename='vcf_'+uuid+'.log',level=logging.DEBUG,filemode='a',
                        format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d_%H:%M:%S_%Z')
    logger=logging.getLogger(__name__)

    ##open stats and timing db
    home_dir=os.path.expanduser('~')
    db_path=os.path.join(home_dir,'vcf_pipe.sqlite')
    logger.info('db_path=%s' % db_path)