def check_md5_and_size( file_name, md5_ref_dictionary, debug=True, stats={}, action='download', key_name=''): if not key_name: key_name = file_name print ("SUB :: calculating dl md5 :: " + file_name) #### get md5 for file downloaded from ftp #if md5_ref_dictionary!=0: dlFileMd5, dlSize = generate_file_md5(file_name) # uses function generate_file_md5 -- in your scripts if debug == True: print( "SUB :: " + file_name + " :: FTP_MD5 :: " + dlFileMd5 ) if md5_ref_dictionary != 0: ### Option to check against reference md5 ref_md5 = get_value(my_key=key_name, my_dictionary=md5_ref_dictionary) if dlFileMd5 == ref_md5: dl_md5_check = "md5_PASS" else: dl_md5_check = "md5_FAIL" else: ref_md5 = "NA" dl_md5_check = "md5_NA" if debug == True: print( "SUB :: " + file_name + " :: REF_MD5 :: " + str(ref_md5) ) #else: # ref_md5 = "NA" # dl_md5_check = "NA" # if debug==True: # print("SUB :: " + file_name + " :: dl_size :: " + str(dlSize)) stats['reference_md5'] = ref_md5 stats['{}_md5'.format(action)] = dlFileMd5 stats['{}_size'.format(action)] = dlSize print "md5_check :: " + dl_md5_check return dl_md5_check
for my_line in f: file_count += 1 ### get a filename from the list my_file_name = my_line.strip() print('Processing ' + str(my_file_name) + ' ( ' + str(file_count) + ' )') ### download file conn = boto.connect_s3( aws_access_key_id = args.accessKey, aws_secret_access_key = args.secretKey, host = args.gateway, #proxy = 'http://cloud-proxy:3128', #proxy_port = 3128, #is_secure=True, # uncomment if you are not using ssl calling_format = boto.s3.connection.OrdinaryCallingFormat(), ) bucket = conn.get_bucket(args.bucketName) key = bucket.get_key(my_file_name) tic = time.time() key.get_contents_to_filename(my_file_name) dlTime = time.time() - tic ### calculate md5 & size tic = time.time() dlFileMd5, dlSize = generate_file_md5(my_file_name) psTime = time.time() - tic ### write stats to log log_str = '\t'.join([str(my_file_name), str(dlFileMd5), str(dlSize), str(dlTime), str(psTime)]) LOGFILE.write(log_str) LOGFILE.write('\n') LOGFILE.flush() os.remove(my_file_name)