def test_old(): replica_dbstr = 'replica.db' target_dir = '/gpfs_750/transfer/replication_cmip5/mohc' db = ReplicaDB('sqlite:///'+ replica_dbstr) log.debug('Querying DB for files...') start = 0 size = 1000 files = db.getQuery()[start:start+size] try: os.makedirs(target_dir) except: pass db_bench = BenchmarkDB('sqlite:///bench_' + replica_dbstr) while len(files) > 0: log.info('%s files retrieved', len(files)) def bench_callback(speed, **other): db_bench.add(utils.Struct(inbound=speed)) dm = DownloadManager() dm.startThreads = 3 dm.maxThreads = 4 dm.start() log.debug('Enpoints per file: %s', max([len(f.endpoints) for f in files])) for f in files: url = f.endpoints[0].url local = os.path.join(target_dir, f.path, f.name) dm.download(url, local, size=f.size) log.debug('Starting dm manage function') dm.manage( benchmark_callback=bench_callback, verbose=True) dm.stop() if dm.results.failed_data: log.error('Failed files **************') for data in dm.results.failed_data: log.error('%s -> %s', data['url'], data['file']) #next batch start += size files = db.getQuery()[start:start+size]
def main(argv=None): from benchmark_db import BenchmarkDB from replica_db import ReplicaDB if argv is None: argv = sys.argv[1:] import getopt try: args, lastargs = getopt.getopt(argv, "h:D:B:t:e:c:dvqhf:", ['help', 'db-name=', 'db-benchmark-name=', 'target=', 'verbose-benchmark', 'start-threads=', 'max-threads=', 'endpoint=', 'batch-size=']) except getopt.error: print sys.exc_info()[:3] return 1 #init values db_name = 'replica.db' db_benchmark_name = 'benchmark.' + db_name endpoint_type = 'GridFTP' start_threads = 1 max_threads = 5 bench_verbose = False batch_size = 20 target_dir ='.' failures = input_file=None #parse arguments for flag, arg in args: if flag=='-h' or flag=='--help': return 1 elif flag=='-D' or flag=='--db-name': db_name = arg elif flag=='-B' or flag=='--db-benchmark-name': db_benchmark_name = arg elif flag=='-t' or flag=='--target': target_dir = arg elif flag=='-e' or flag=='--endpoint': endpoint_type=arg elif flag=='--verbose-benchmark': bench_verbose = True elif flag=='--start-threads': start_threads = int(arg) elif flag=='--max-threads': max_threads = int(arg) elif flag=='--batch-size': batch_size = int(arg) elif flag=='-f': input_file = arg elif flag=='-d': log.setLevel(logging.DEBUG) elif flag=='-v': log.setLevel(logging.INFO) elif flag=='-q': log.setLevel(logging.NONE) elif flag=='-c': comment=arg # comment is ignored if input_file: #no DB here, behave different (I must change the concept, this is a fast workaround) file=None try: file = open(input_file, 'r') dm = DownloadManager() dm.startThreads = start_threads dm.maxThreads = max_threads dm.start() try: for line in file: data = line.split('\t') if len(data)<3: continue # bad line, probably blank url, local, size = data[:3] if len(data)>5: # control flags, presently 0 or -1 flags=data[5] else: flags=0 dm.download(url, local, size=int(size), flags=flags) except: print "jfp exception caught in Download.main() line loop",line print sys.exc_info()[:3] raise try: dm.manage( verbose=bench_verbose) except: print "jfp exception caught in Download.main() call of dm.manage()" print sys.exc_info()[:3] raise try: dm.stop() except: print "jfp exception caught in Download.main() call of dm.stop()" print sys.exc_info()[:3] raise if dm.results.failed_data: if failures is None: failures=[] failures.extend(dm.results.failed_data) log.error('%s files failed in this batch', len(dm.results.failed_data)) except: print "jfp exception caught in Download.main()" print sys.exc_info()[:3] return 1 finally: if file: file.close() return 0 #some checks print "jfp db_name=",db_name if not os.path.isfile(db_name): log.error('Replica DB not found: %s', db_name) return 1 if not os.path.isdir(target_dir): log.warn('Target directory does not exists. Creating new: %s', target_dir) os.makedirs(target_dir) db = ReplicaDB('sqlite:///'+ db_name) db_bench = BenchmarkDB('sqlite:///' + db_benchmark_name) log.debug('Querying DB for files...') start = 0 #jfp: I'm not sure when this second download section would really happen. # So I haven't added any provision for control flags here. files = db.getQuery()[start:start+batch_size] failures = [] while len(files) > 0: log.info('%s files retrieved', len(files)) def bench_callback(speed, **other): db_bench.add(utils.Struct(inbound=speed)) dm = DownloadManager() dm.startThreads = start_threads dm.maxThreads = max_threads dm.start() log.info('Enpoints per file: %s', max([len(f.endpoints) for f in files[:5]])) for f in files: #for each file we set a source url and a target path to the destination file url = None for ep in f.endpoints: if ep.type == endpoint_type: url = ep.url break if not url: log.error('No endpoint for file: %s', f) continue local = os.path.join(target_dir, f.path, f.name) dm.download(url, local, size=f.size) log.debug('Starting dm manage function') dm.manage( benchmark_callback=bench_callback, verbose=bench_verbose) dm.stop() if dm.results.failed_data: failures.extend(dm.results.failed_data) log.error('%s files failed in this batch', len(dm.results.failed_data)) #next batch start += batch_size files = db.getQuery()[start:start+batch_size] if failures: log.error('A total of %s files failed:', len(failures)) for data in failures: log.error('%s -> %s', data['url'], data['file']) else: log.info('All files completely transfered')