Esempio n. 1
0
def predict():
    #print("I was here 1")
    if request.method == 'POST':
        #print(request.form.get('folder name'))
        try:
            #print('in try block')
            s3 = boto3.resource('s3')
            #print('after s3')
            bucket = s3.Bucket('1673-assignment-1')
            #print('after bucket')
            folder_name = request.form['folder name']
            #print(folder_name)
            c = []
            #print(str(folder_name))
            for obj in bucket.objects.filter(Delimiter='/',
                                             Prefix=str(folder_name) + '/'):
                print(obj)
                c.append(obj.key)

            files_all = ' ,'.join(c[1:len(c)])
            #print(files_all)

            #model_prediction = round(float(model_prediction), 2)
        except ValueError:
            return "Please check if the values are entered correctly"
    return render_template('predict.html', contents=files_all)
Esempio n. 2
0
def load_pkl_data(file_path, folder=None):
    """
  Reads pickle file as a python dictionary (only Signal data).

  Arguments:
    file_path -- {str} -- path to pickle iq_matrix file

  Returns:
    Python dictionary
  """

    if "s3://" in folder:
      BUCKET='sota-mafat'
      s3 = boto3.resource('s3')
      print(f"getting pkl s3:{file_path}")
      output = pickle.loads(s3.Bucket("sota-mafat").Object(f"{file_path}.pkl").get()['Body'].read())
      return output
    
    if folder is not None:
        path = os.path.join(folder, file_path + '.pkl')
    else:
        path = os.path.join(file_path + '.pkl')
    with open(path, 'rb') as data:
        output = pickle.load(data)
    return output
Esempio n. 3
0
    def get_sat_data(self, tile, date):
        s3 = boto3.resource('s3')
        level = self.level
        s2 = 'sentinel-s2' + '-' + level
        utm_code = tile['utm_code']
        lat_band = tile['lat_band']
        square = tile['square']
        year = str(date.year)
        month = str(date.month)
        day = str(date.day)
        pref = 'tiles/%s/%s/%s/%s/%s/%s/' % (utm_code, lat_band, square, year,
                                             month, str(day))

        results = list(s3.Bucket(s2).objects.filter(Prefix=pref).all())
        return results
Esempio n. 4
0
def get_mint_bucket_name(region: str):
    account_id = get_account_id(region)
    account_alias = get_account_alias(region)
    s3 = boto3.resource('s3')
    parts = account_alias.split('-')
    prefix = parts[0]
    bucket_name = '{}-stups-mint-{}-{}'.format(prefix, account_id, region)
    bucket = s3.Bucket(bucket_name)
    try:
        bucket.load()
        return bucket.name
    except:
        bucket = None
    for bucket in s3.buckets.all():
        if bucket.name.startswith('{}-stups-mint-{}-'.format(
                prefix, account_id)):
            return bucket.name
    return bucket_name
    statusBP = responseBPStatus['Status']
    print(statusBP)
    if statusBP == 'COMPLETED':
        print("Batch Prediction creada\n")
        batch_prediction_exist = 1
    if statusBP == 'FAILED':
        print("Error al crear la Batch Prediction\n")
        batch_prediction_exist = 2
        sys.exit(40)

# Si se finaliza la prediccion, se trae el CSV con los resultados obtenidos
if batch_prediction_exist == 1:
    # Descarga de archivo en carpeta correspondiente de predicciones
    print("Descarga de archivo CSV con predicciones")
    s3 = boto3.resource('s3',
                        aws_access_key_id=AWS_ACCESS_KEY_ID,
                        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                        region_name=REGION)
    file_path = 'batch-prediction/result/BPID' + extension + '-' + FILE_NAME_BATCH + '.gz'

    s3.Bucket(BUCKET_NAME_BATCH).download_file(file_path, 'mlOutput.csv.gz')
    inF = gzip.GzipFile('mlOutput.csv.gz', 'rb')
    s = inF.read()
    inF.close()

    with open('mlOutput.csv', 'wb') as csvfile:
        csvfile.write(s)
        csvfile.close()

print("\n----------------FIN---------------\n")
Esempio n. 6
0
APP_ROOT = os.path.dirname(os.path.abspath(__file__))

AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''

# bucket_name = AWS_ACCESS_KEY_ID.lower() + '-dump'

bucket_name = 'test-toshal'
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

bucket = conn.get_bucket(bucket_name)
session = Session(aws_access_key_id=AWS_ACCESS_KEY_ID,
                  aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
s3 = session.resource('s3')
bucket1 = s3.Bucket(bucket_name)

content = []
for i in bucket1.objects.all():
    content.append(i.key)


def percent_cb(complete, total):
    sys.stdout.write('.')
    sys.stdout.flush()


@app.route('/')
def index():
    return render_template('upload.html')
def main():
    #retrieve argument
    args = parse_arguments()
    main_directory = args.directory
    class1 = args.class1
    class2 = args.class2
    force_by_user = args.force
    if args.verbose:
        lg.basicConfig(level=lg.INFO)

    #Variables declaration
    result = []
    directory_feature = os.path.join(main_directory, "features", "*.json")
    nb_training_data = args.nb_training_data
    iteration_model = args.iteration_model
    min_partition = args.min_partition
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('oc-calculdistribues-sberton')
    result_file = class1 + '_' + class2 + '_' + time.strftime(
        "%Y%m%d%H%M%S") + '.json'
    model_file = 'model_' + class1 + '_' + class2 + '_' + str(
        nb_training_data) + '_' + str(iteration_model)
    model_pathname = os.path.join(main_directory, "models", model_file)

    #Searching existing model and store existence in is_model boolean
    key = 'distributed_learning/models/' + model_file
    objs = list(bucket.objects.filter(Prefix=key))
    is_model = len(objs) > 0 and objs[0].key.startswith(key + '/')

    start_time = time.time()
    lg.info(
        '#################### Starting pet-classification ######################'
    )
    lg.info('Class 1 is %s', class1)
    lg.info('Class 2 is %s', class2)
    lg.info('Number of training datas is %s', nb_training_data)
    lg.info('Number of iterations model is %s', iteration_model)

    #persist a common rdd which is using by both training and testing datas
    common_rdd = sc.textFile(directory_feature, minPartitions=min_partition)\
                   .filter(lambda line: line.split(', ')[0] in (class1, class2) or class2 == 'All')\
                   .persist()

    #Loading model if exists
    if is_model and not force_by_user:
        model = SVMModel.load(sc, model_pathname)
        lg.info('Found and load recorded model %s', model_file)
    else:
        lg.info('No recorded model found')
        #create training rdd and train model if no model found or force
        train_data_rdd = common_rdd.filter(lambda line: int(line.split(',')[1]) <= nb_training_data)\
                                   .map(lambda line: Row(label=0.0, features=line.split(', ')[2:])
                                        if line.split(', ')[0] == class1
                                        else Row(label=1.0, features=line.split(', ')[2:]))\
                                   .map(lambda line: LabeledPoint(line.label, line.features))

        lg.info('%s features for training datas', train_data_rdd.count())
        lg.info('Start to training model')
        model = SVMWithSGD.train(train_data_rdd, iterations=iteration_model)
        lg.info('Training model terminated')

    training_time = time.time()
    training_duration = training_time - start_time
    #Create testing rdd
    test_data_rdd = common_rdd.filter(lambda line: int(line.split(', ')[1]) > nb_training_data)\
                      .map(lambda line: Row(label=0.0, features=line.split(', ')[2:])
                                           if line.split(', ')[0] == class1
                                           else Row(label=1.0, features=line.split(', ')[2:]))\
                      .map(lambda row: LabeledPoint(row.label, row.features))
    lg.info('%s features for test datas', test_data_rdd.count())

    # Evaluating the model on training data
    predictions = test_data_rdd.map(
        lambda row: (row.label, float(model.predict(row.features))))
    train_error = predictions.filter(lambda lp: lp[0] != lp[1]).count() \
                                     / float(predictions.count())
    lg.info('Test Error : %s', str(train_error))
    end_time = time.time()
    duration = end_time - start_time
    lg.info('Duration %s', str(duration))
    prediction_duration = end_time - training_time
    # #Save and dump result on S3
    result.append({
        "class1": class1,
        "class2": class2,
        "iteration_model": iteration_model,
        "nb_training_data": nb_training_data,
        "total_duration": duration,
        "train_duration": training_duration,
        "predict_duration": prediction_duration,
        "error": train_error
    })

    s3object = s3.Object('oc-calculdistribues-sberton', result_file)
    s3object.put(Body=(bytes(json.dumps(result, indent=2).encode('UTF-8'))))

    #Save model if not exists
    if not is_model:
        lg.info('Saving model at %s', model_file)
        model.save(sc, model_pathname)

    lg.info(
        '#################### Ending pet-classification ######################'
    )
Esempio n. 8
0
from configparser import ConfigParser
import json

import configparser
import os.path
from os import path

config_parser = configparser.ConfigParser()
config_parser.read("credentials.ini")
BUCKET_NAME = config_parser['MAIN']['BUCKET_NAME']

cred = boto3.Session().get_credentials()
ACCESS_KEY = cred.access_key
SECRET_KEY = cred.secret_key

s3 = boto3.resource('s3', region_name='us-east-2')
bucket = s3.Bucket(BUCKET_NAME)

for my_bucket_object in bucket.objects.all():
    print(my_bucket_object)

s3client = boto3.client('s3',
                        aws_access_key_id=ACCESS_KEY,
                        aws_secret_access_key=SECRET_KEY,
                        region_name='us-east-2')

response = s3client.get_object(Bucket=BUCKET_NAME, Key='myfile.json')
body = response['Body'].read()

json_obj = json.loads(body)
print(json_obj)