Ejemplo n.º 1
0
def test_old():
    replica_dbstr = 'replica.db'
    target_dir = '/gpfs_750/transfer/replication_cmip5/mohc'
    db = ReplicaDB('sqlite:///'+ replica_dbstr)
    log.debug('Querying DB for files...')
    start = 0
    size = 1000

    files = db.getQuery()[start:start+size]
    try:
        os.makedirs(target_dir)
    except: pass

    db_bench = BenchmarkDB('sqlite:///bench_' + replica_dbstr)
    while len(files) > 0:
        log.info('%s files retrieved', len(files))

        def bench_callback(speed, **other):
            db_bench.add(utils.Struct(inbound=speed))

        dm = DownloadManager()
        dm.startThreads = 3
        dm.maxThreads = 4
        dm.start()

        log.debug('Enpoints per file: %s', max([len(f.endpoints) for f in files]))

        for f in files:
            url = f.endpoints[0].url
            local = os.path.join(target_dir, f.path, f.name)
            dm.download(url, local, size=f.size)

        log.debug('Starting dm manage function')
        dm.manage( benchmark_callback=bench_callback, verbose=True)
        dm.stop()

        if dm.results.failed_data:
            log.error('Failed files **************')
            for data in dm.results.failed_data:
                log.error('%s -> %s', data['url'], data['file'])
    
        #next batch
        start += size
        files = db.getQuery()[start:start+size]
Ejemplo n.º 2
0
def main(argv=None):
    from benchmark_db import BenchmarkDB
    from replica_db import ReplicaDB

    if argv is None: argv = sys.argv[1:]

    import getopt
    try:
        args, lastargs = getopt.getopt(argv, "h:D:B:t:e:c:dvqhf:", ['help', 'db-name=', 'db-benchmark-name=', 'target=', 'verbose-benchmark',
                            'start-threads=', 'max-threads=', 'endpoint=', 'batch-size='])
    except getopt.error:
        print sys.exc_info()[:3]
        return 1

    #init values
    db_name = 'replica.db'
    db_benchmark_name = 'benchmark.' + db_name
    endpoint_type = 'GridFTP'
    start_threads = 1
    max_threads = 5
    bench_verbose = False
    batch_size = 20
    target_dir ='.'
    failures = input_file=None
    
    #parse arguments
    for flag, arg in args:
        if flag=='-h' or flag=='--help': return 1
        elif flag=='-D' or flag=='--db-name':               db_name = arg
        elif flag=='-B' or flag=='--db-benchmark-name':     db_benchmark_name = arg
        elif flag=='-t' or flag=='--target':                target_dir = arg
        elif flag=='-e' or flag=='--endpoint':              endpoint_type=arg
        elif flag=='--verbose-benchmark':                   bench_verbose = True
        elif flag=='--start-threads':                       start_threads = int(arg)
        elif flag=='--max-threads':                         max_threads = int(arg)
        elif flag=='--batch-size':                          batch_size = int(arg)
        elif flag=='-f':            input_file = arg

        elif flag=='-d':            log.setLevel(logging.DEBUG)
        elif flag=='-v':            log.setLevel(logging.INFO)
        elif flag=='-q':            log.setLevel(logging.NONE)
        elif flag=='-c':            comment=arg  # comment is ignored
    
    if input_file:
        #no DB here, behave different (I must change the concept, this is a fast workaround)
        file=None
        try:
            file = open(input_file, 'r')
            dm = DownloadManager()
            dm.startThreads = start_threads
            dm.maxThreads = max_threads
            dm.start()
            try:
                for line in file:
                    data = line.split('\t')
                    if len(data)<3: continue   # bad line, probably blank
                    url, local, size = data[:3]
                    if len(data)>5:   # control flags, presently 0 or -1
                        flags=data[5]
                    else:
                        flags=0
                    dm.download(url, local, size=int(size), flags=flags)
            except:
                print "jfp exception caught in Download.main() line loop",line
                print sys.exc_info()[:3]
                raise
            try:
                dm.manage( verbose=bench_verbose)
            except:
                print "jfp exception caught in Download.main() call of dm.manage()"
                print sys.exc_info()[:3]
                raise
            try:
                dm.stop()
            except:
                print "jfp exception caught in Download.main() call of dm.stop()"
                print sys.exc_info()[:3]
                raise

            if dm.results.failed_data:
                if failures is None: failures=[]
                failures.extend(dm.results.failed_data)
                log.error('%s files failed in this batch', len(dm.results.failed_data))

    
        except:
            print "jfp exception caught in Download.main()"
            print sys.exc_info()[:3]
            return 1
        finally:
            if file: file.close()
        return 0
            

    #some checks
    print "jfp db_name=",db_name
    if not os.path.isfile(db_name):
        log.error('Replica DB not found: %s', db_name)
        return 1
    if not os.path.isdir(target_dir):
        log.warn('Target directory does not exists. Creating new: %s', target_dir)
        os.makedirs(target_dir)

    
    db = ReplicaDB('sqlite:///'+ db_name)
    db_bench = BenchmarkDB('sqlite:///' + db_benchmark_name)

    log.debug('Querying DB for files...')
    start = 0

    #jfp: I'm not sure when this second download section would really happen.
    #     So I haven't added any provision for control flags here.
    files = db.getQuery()[start:start+batch_size]
    
    failures = []

    while len(files) > 0:
        log.info('%s files retrieved', len(files))

        def bench_callback(speed, **other):
            db_bench.add(utils.Struct(inbound=speed))

        dm = DownloadManager()
        dm.startThreads = start_threads
        dm.maxThreads = max_threads
        dm.start()

        log.info('Enpoints per file: %s', max([len(f.endpoints) for f in files[:5]]))

        for f in files:
            #for each file we set a source url and a target path to the destination file
            url = None
            
            for ep in f.endpoints:
                if ep.type == endpoint_type:
                    url = ep.url
                    break
            if not url:
                log.error('No endpoint for file: %s', f)
                continue

            local = os.path.join(target_dir, f.path, f.name)
            dm.download(url, local, size=f.size)

        log.debug('Starting dm manage function')
        dm.manage( benchmark_callback=bench_callback, verbose=bench_verbose)
        dm.stop()

        if dm.results.failed_data:
            failures.extend(dm.results.failed_data)
            log.error('%s files failed in this batch', len(dm.results.failed_data))

        #next batch
        start += batch_size
        files = db.getQuery()[start:start+batch_size]


    if failures:
        log.error('A total of %s files failed:', len(failures))
        for data in failures:
            log.error('%s -> %s', data['url'], data['file'])
    else:
        log.info('All files completely transfered')