def store_s3(tdir, name): """Store in S3""" # Use the .boto config for credentials bucket_name = 'flextrip-db-dumps' tnow = datetime.datetime.now() day = int(tnow.day) if "full" in name: if day >= 1 and day <= 7: kname = "monthly/" else: kname = "weekly/" else: kname = "daily/" key_name = kname + name fname = tdir + "/" + name #print kname, fname s3_multipart_upload.main(fname, bucket_name, s3_key_name=key_name, use_rr=False, make_public=False)
files = files + glob.glob(path) else: print "Invalid file specified: ", file print "Processing files: ", files s3cxn = boto.connect_s3() # Check that all buckets exist or create them if needed for f in files: basefile = os.path.basename(f) try: print "Checking bucket: ", bucket_name bucket = check_s3_bucket_exists(s3cxn, bucket_name) except Exception: if options.create_buckets == True: print "Creating bucket: ", bucket_name bucket = s3cxn.create_bucket(bucket_name) bucket = check_s3_bucket_exists(s3cxn, bucket_name) else: sys.exit(errno.ENFILE) bucket = s3cxn.get_bucket(bucket) key = bucket.get_key(basefile) exists = (key is not None) if exists == True: print "Key exists - skipping upload" else: print "Uploading: ", f s3_multipart_upload.main(f, bucket_name)
for f in files: basefile = os.path.basename(f) mic = basefile.split("_")[0] bucket_name = BUCKET_PREFIX+mic.lower() try: print "Checking bucket: ", bucket_name bucket = check_s3_bucket_exists(s3cxn, bucket_name) except Exception: if options.create_buckets == True: print "Creating bucket: ", bucket_name s3cxn.create_bucket(bucket_name) else: sys.exit(errno.ENFILE) bucket = s3cxn.get_bucket(bucket) key = bucket.get_key(basefile) exists = (key is not None) if exists == True: print "Key exists - skipping upload" else: print "Uploading: ", f s3_multipart_upload.main(f, bucket_name) if options.donotqueue is False: m = MHMessage() m['input_file'] = os.path.basename(f) m['bucket'] = bucket_name print "Queueing message" , m.get_body(), " ==> ", options.queue q.write(m) else : print "Skipping message queueing"
full_input_path = STORAGE_PREFIX + input_file hdf_file = input_file.replace('csv.gz', 'hdf') full_hdf_path = FEATURE_DIR + hdf_file if os.path.isfile(full_hdf_path) and hdf_complete(full_hdf_path): print "HDF generated and complete - skipping feature extraction" else: print "Processing file: ", input_file command = "python %s -d %s %s" % \ (feature_extractor, FEATURE_DIR, full_input_path) (code, string) = commands.getstatusoutput(command) if options.debug is True: print "Processing retured: ", code, string print "Moving processed file file to bucket" # KTK - TODO - should wrap in try except block - to catch failed upload s3_multipart_upload.main(full_hdf_path, bucket) retries = 0 md = boto.utils.get_instance_metadata() m['instance-id'] = md['instance-id'] m['public-hostname'] = md['public-hostname'] m['completion-time'] = time.asctime(time.gmtime()) qout.write(m) qin.delete_message(m) os.remove(full_hdf_path) os.remove(full_input_path) else: time.sleep(options.retry_wait) retries += 1