Ejemplo n.º 1
0
def main(username):
    print(" In server training ")
    os.makedirs(os.path.join(DATA_PATH, 'checkpoints'))

    print("Created /data/checkpoints folders")

    # # Download user file
    userdata_filename = os.path.join(DATA_PATH, f'{username}.json')
    download_file(
        os.path.join(TRAINING_CONFIG, f'{username}.json'),
        userdata_filename,
    )

    (task, username, model_name, ratio, is_reducelrscheduler, patience, factor,
     min_lr, optimizer, batch_size, learning_rate, epochs,
     dataset_filename) = get_config_data(userdata_filename)

    # Download dataset
    download_file(
        os.path.join(TRAINING_CONFIG, dataset_filename),
        os.path.join(DATA_PATH, dataset_filename),
    )

    print(" Completed fetching data from s3 ")
    inference_data = {}
    if task == 'image':
        inference_data = train_image_classification(
            username, model_name, ratio, is_reducelrscheduler, patience,
            factor, min_lr, optimizer, batch_size, learning_rate, epochs,
            dataset_filename)
    elif task == 'text':
        inference_data = train_sentiment_analysis(username, model_name, ratio,
                                                  is_reducelrscheduler,
                                                  patience, factor, min_lr,
                                                  optimizer, batch_size,
                                                  learning_rate, epochs,
                                                  dataset_filename)

    # Upload data to S3
    upload_model_data(task, username)
    print('Uploaded inference data to s3')

    # Update inference json
    inference_config = fetch_json(INFERENCE_CONFIG)
    inference_config[username] = inference_data
    inference_config[username]['created'] = datetime.now().strftime(
        '%d-%m-%y %H:%M')
    put_object(INFERENCE_CONFIG, inference_config)
    print("Added user information to inference.json and uploaded to s3")

    # Delete train data from S3
    delete_object(os.path.join(TRAINING_CONFIG, dataset_filename))
    delete_object(os.path.join(TRAINING_CONFIG, f'{username}.json'))
    print("Deleted user data from training folder in s3")

    # Delete data
    shutil.rmtree(DATA_PATH)
    print("Deleted data folder")
Ejemplo n.º 2
0
    def _load_data(self):
        if not os.path.isfile(self._raw_path):
            download_file(self._raw_path)

        self._df = pd.read_csv(self._raw_path)

        logging.info(f'Tipos:\n{self._df.dtypes}')
        logging.info(f'Cabecera:\n{self._df.head()}')
        logging.info(f'Pie:\n{self._df.tail(7)}')
        self.row_count = self._df.shape[0]
        self.tip_avg = self._df["tip_amount"].mean()
Ejemplo n.º 3
0
def process_file():
    """Process the file. Download from S3. Parse and upload newly formatted
       file.
    """
    local_file = s3.download_file(BUCKET_NAME, S3_OBJECT)
    logging.info('File downloaded: ' + local_file)
    try:
        if local_file is not None:
            with open(local_file) as fp:
                with open(NEW_LOG_FILE, "w") as nf:
                    logging.info('Creating new file')
                    line = fp.readline()
                    while line:
                        line = fp.readline()
                        if line.startswith('['):
                            trim_line = line[29:]
                            t = re.findall(r"[\S]+\S+\S", trim_line)
                            res = lc.parse_line(t)
                            new_data = lc.clean_date(res)
                            nf.write(lc.format_new_line(line, new_data) + '\n')
                        else:
                            nf.write(line + '\n')

    except Exception as e:
        logging.error(e)

    finally:
        # Clean up. Close files, upload to S3 and delete temporary files
        nf.close()
        fp.close()
        logging.info('New log file completed')
        s3.upload_file(nf.name, BUCKET_NAME)
        os.remove(nf.name)
        os.remove(fp.name)
Ejemplo n.º 4
0
    def predict(self, data, features_names):

        result = "PASS"
        params = dict((item.strip()).split("=") for item in data.split(","))
        print(params)
        eparams = ["model", "data"]
        if not all(x in params for x in eparams):
            print("Not all parameters have been defined")
            result = "FAIL"
            return result

        model = params['model']
        data = params['data']
        s3endpointUrl = os.environ['S3_ENDPOINT']
        #s3objectStoreLocation = os.environ['S3OBJECTSTORELOCATION']
        s3objectStoreLocation = 'fruitml'
        s3accessKey = os.environ['AWS_ACCESS_KEY_ID']
        s3secretKey = os.environ['WS_SECRET_ACCESS_KEY']

        tmpdir = str(tempfile.mkdtemp())
        modelurl = model.split("/")
        MODEL = modelurl[-1]

        # Download the trained model from storage backend in to MODEL_PATH
        session = s3.create_session_and_resource(s3accessKey, s3secretKey,
                                                 s3endpointUrl)
        s3.download_file(session, s3objectStoreLocation, model,
                         tmpdir + "/" + MODEL)

        self.clf = joblib.load(tmpdir + "/" + MODEL)

        #Extract value of X
        dataset = data.split(':')
        dataset = filter(None, dataset)
        featurearray = [float(i) for i in dataset]
        columnNames = []
        index = 1
        for i in featurearray:
            columnNames.append('f' + str(index))
            index = index + 1

        rowdf = pd.DataFrame([featurearray], columns=columnNames)
        predictions = self.clf.predict(rowdf)
        # initialize list of lists
        print(predictions)
        return predictions
def get_df(data_file):

    # This code works to convert json files but we are not currently planning to do that
    # Leaving it here in case we want to expand functionality later, but that would require changes elsewhere
    # if data_file[-4:] == 'json':
    #     data = []
    #     for line in open(data_file, 'r'):
    #         row = json.loads(line)
    #         data.append(row)
    #     df = pd.DataFrame(data)
    #     return df

    data = s3.download_file('yelp-data-shared-labs18', data_file)
    df = pd.read_parquet(data)
    return df
Ejemplo n.º 6
0
def download(filename):
    if request.method == 'GET':
        output = download_file(filename, BUCKET)

        return send_file(output, as_attachment=True)
Ejemplo n.º 7
0
def beds_utilization_rate():
    CSV_PATH = './usa-hospital-beds.csv'
    if not os.path.exists(CSV_PATH):
        download_file()
    get_beds_utilization_rate()
    return render_template('bed_utilization_rate.html')
Ejemplo n.º 8
0
def beds():
    CSV_PATH = './usa-hospital-beds.csv'
    if not os.path.exists(CSV_PATH):
        download_file()
    get_beds()
    return render_template('all_beds.html') 
def download_data(path, save_name=None):
    s3.download_file('yelp-data-shared-labs18', path, save_name=save_name)
Ejemplo n.º 10
0
 def test_download_file(self, client):
     res = download_file('bucket', 'object_name')
     self.assertEqual(res, 'downloads/object_name')
     fake_client.download_file.assert_called_once_with(
         'bucket', 'object_name', 'downloads/object_name')