Esempio n. 1
0
def store_s3(tdir, name):
    """Store in S3"""
    # Use the .boto config for credentials
    bucket_name = 'flextrip-db-dumps'

    tnow = datetime.datetime.now()
    day = int(tnow.day)
    if "full" in name:
        if day >= 1 and day <= 7:
            kname = "monthly/"
        else:
            kname = "weekly/"
    else:
        kname = "daily/"

    key_name = kname + name
    fname = tdir + "/" + name
    #print kname, fname
    s3_multipart_upload.main(fname,
                             bucket_name,
                             s3_key_name=key_name,
                             use_rr=False,
                             make_public=False)
            files = files + glob.glob(path)
        else:
            print "Invalid file specified: ", file

    print "Processing files: ", files

    s3cxn = boto.connect_s3()

    # Check that all buckets exist or create them if needed
    for f in files:
        basefile = os.path.basename(f)
        try:
            print "Checking bucket: ", bucket_name
            bucket = check_s3_bucket_exists(s3cxn, bucket_name)
        except Exception:
            if options.create_buckets == True:
                print "Creating bucket: ", bucket_name
                bucket = s3cxn.create_bucket(bucket_name)
                bucket = check_s3_bucket_exists(s3cxn, bucket_name)
            else:
                sys.exit(errno.ENFILE)

        bucket = s3cxn.get_bucket(bucket)
        key = bucket.get_key(basefile)
        exists = (key is not None)
        if exists == True:
            print "Key exists - skipping upload"
        else:
            print "Uploading: ", f
            s3_multipart_upload.main(f, bucket_name)
    for f in files:
        basefile = os.path.basename(f)
        mic = basefile.split("_")[0]
        bucket_name = BUCKET_PREFIX+mic.lower()
        try:
            print "Checking bucket: ", bucket_name
            bucket = check_s3_bucket_exists(s3cxn, bucket_name)    
        except Exception:
            if options.create_buckets == True:
                print "Creating bucket: ", bucket_name
                s3cxn.create_bucket(bucket_name)
            else:
                sys.exit(errno.ENFILE)
    
        bucket = s3cxn.get_bucket(bucket)
        key = bucket.get_key(basefile)
        exists = (key is not None)
        if exists == True:
            print "Key exists - skipping upload"
        else:
            print "Uploading: ", f
            s3_multipart_upload.main(f, bucket_name)         
            if options.donotqueue is False:
                m = MHMessage()
                m['input_file'] = os.path.basename(f)
                m['bucket'] = bucket_name
                print "Queueing message" , m.get_body(), " ==> ", options.queue
                q.write(m)
            else : 
                print "Skipping message queueing"
            full_input_path = STORAGE_PREFIX + input_file
            hdf_file = input_file.replace('csv.gz', 'hdf')
            full_hdf_path = FEATURE_DIR + hdf_file
            if os.path.isfile(full_hdf_path) and hdf_complete(full_hdf_path):
                print "HDF generated and complete - skipping feature extraction" 
            else:
                print "Processing file: ", input_file
                command = "python %s -d %s %s" % \
                  (feature_extractor, FEATURE_DIR, full_input_path) 
                (code, string) = commands.getstatusoutput(command) 
                if options.debug is True:
                    print "Processing retured: ", code, string

            print "Moving processed file file to bucket"
            # KTK - TODO - should wrap in try except block - to catch failed upload
            s3_multipart_upload.main(full_hdf_path, bucket)
           
            retries = 0
            md = boto.utils.get_instance_metadata()
            m['instance-id'] = md['instance-id']
            m['public-hostname'] = md['public-hostname']
            m['completion-time'] =  time.asctime(time.gmtime())
            qout.write(m) 
            qin.delete_message(m)
            os.remove(full_hdf_path)
            os.remove(full_input_path)
                 
        else:
            time.sleep(options.retry_wait)
            retries += 1
     
Esempio n. 5
0
            full_input_path = STORAGE_PREFIX + input_file
            hdf_file = input_file.replace('csv.gz', 'hdf')
            full_hdf_path = FEATURE_DIR + hdf_file
            if os.path.isfile(full_hdf_path) and hdf_complete(full_hdf_path):
                print "HDF generated and complete - skipping feature extraction"
            else:
                print "Processing file: ", input_file
                command = "python %s -d %s %s" % \
                  (feature_extractor, FEATURE_DIR, full_input_path)
                (code, string) = commands.getstatusoutput(command)
                if options.debug is True:
                    print "Processing retured: ", code, string

            print "Moving processed file file to bucket"
            # KTK - TODO - should wrap in try except block - to catch failed upload
            s3_multipart_upload.main(full_hdf_path, bucket)

            retries = 0
            md = boto.utils.get_instance_metadata()
            m['instance-id'] = md['instance-id']
            m['public-hostname'] = md['public-hostname']
            m['completion-time'] = time.asctime(time.gmtime())
            qout.write(m)
            qin.delete_message(m)
            os.remove(full_hdf_path)
            os.remove(full_input_path)

        else:
            time.sleep(options.retry_wait)
            retries += 1