def main(username): print(" In server training ") os.makedirs(os.path.join(DATA_PATH, 'checkpoints')) print("Created /data/checkpoints folders") # # Download user file userdata_filename = os.path.join(DATA_PATH, f'{username}.json') download_file( os.path.join(TRAINING_CONFIG, f'{username}.json'), userdata_filename, ) (task, username, model_name, ratio, is_reducelrscheduler, patience, factor, min_lr, optimizer, batch_size, learning_rate, epochs, dataset_filename) = get_config_data(userdata_filename) # Download dataset download_file( os.path.join(TRAINING_CONFIG, dataset_filename), os.path.join(DATA_PATH, dataset_filename), ) print(" Completed fetching data from s3 ") inference_data = {} if task == 'image': inference_data = train_image_classification( username, model_name, ratio, is_reducelrscheduler, patience, factor, min_lr, optimizer, batch_size, learning_rate, epochs, dataset_filename) elif task == 'text': inference_data = train_sentiment_analysis(username, model_name, ratio, is_reducelrscheduler, patience, factor, min_lr, optimizer, batch_size, learning_rate, epochs, dataset_filename) # Upload data to S3 upload_model_data(task, username) print('Uploaded inference data to s3') # Update inference json inference_config = fetch_json(INFERENCE_CONFIG) inference_config[username] = inference_data inference_config[username]['created'] = datetime.now().strftime( '%d-%m-%y %H:%M') put_object(INFERENCE_CONFIG, inference_config) print("Added user information to inference.json and uploaded to s3") # Delete train data from S3 delete_object(os.path.join(TRAINING_CONFIG, dataset_filename)) delete_object(os.path.join(TRAINING_CONFIG, f'{username}.json')) print("Deleted user data from training folder in s3") # Delete data shutil.rmtree(DATA_PATH) print("Deleted data folder")
def _load_data(self): if not os.path.isfile(self._raw_path): download_file(self._raw_path) self._df = pd.read_csv(self._raw_path) logging.info(f'Tipos:\n{self._df.dtypes}') logging.info(f'Cabecera:\n{self._df.head()}') logging.info(f'Pie:\n{self._df.tail(7)}') self.row_count = self._df.shape[0] self.tip_avg = self._df["tip_amount"].mean()
def process_file(): """Process the file. Download from S3. Parse and upload newly formatted file. """ local_file = s3.download_file(BUCKET_NAME, S3_OBJECT) logging.info('File downloaded: ' + local_file) try: if local_file is not None: with open(local_file) as fp: with open(NEW_LOG_FILE, "w") as nf: logging.info('Creating new file') line = fp.readline() while line: line = fp.readline() if line.startswith('['): trim_line = line[29:] t = re.findall(r"[\S]+\S+\S", trim_line) res = lc.parse_line(t) new_data = lc.clean_date(res) nf.write(lc.format_new_line(line, new_data) + '\n') else: nf.write(line + '\n') except Exception as e: logging.error(e) finally: # Clean up. Close files, upload to S3 and delete temporary files nf.close() fp.close() logging.info('New log file completed') s3.upload_file(nf.name, BUCKET_NAME) os.remove(nf.name) os.remove(fp.name)
def predict(self, data, features_names): result = "PASS" params = dict((item.strip()).split("=") for item in data.split(",")) print(params) eparams = ["model", "data"] if not all(x in params for x in eparams): print("Not all parameters have been defined") result = "FAIL" return result model = params['model'] data = params['data'] s3endpointUrl = os.environ['S3_ENDPOINT'] #s3objectStoreLocation = os.environ['S3OBJECTSTORELOCATION'] s3objectStoreLocation = 'fruitml' s3accessKey = os.environ['AWS_ACCESS_KEY_ID'] s3secretKey = os.environ['WS_SECRET_ACCESS_KEY'] tmpdir = str(tempfile.mkdtemp()) modelurl = model.split("/") MODEL = modelurl[-1] # Download the trained model from storage backend in to MODEL_PATH session = s3.create_session_and_resource(s3accessKey, s3secretKey, s3endpointUrl) s3.download_file(session, s3objectStoreLocation, model, tmpdir + "/" + MODEL) self.clf = joblib.load(tmpdir + "/" + MODEL) #Extract value of X dataset = data.split(':') dataset = filter(None, dataset) featurearray = [float(i) for i in dataset] columnNames = [] index = 1 for i in featurearray: columnNames.append('f' + str(index)) index = index + 1 rowdf = pd.DataFrame([featurearray], columns=columnNames) predictions = self.clf.predict(rowdf) # initialize list of lists print(predictions) return predictions
def get_df(data_file): # This code works to convert json files but we are not currently planning to do that # Leaving it here in case we want to expand functionality later, but that would require changes elsewhere # if data_file[-4:] == 'json': # data = [] # for line in open(data_file, 'r'): # row = json.loads(line) # data.append(row) # df = pd.DataFrame(data) # return df data = s3.download_file('yelp-data-shared-labs18', data_file) df = pd.read_parquet(data) return df
def download(filename): if request.method == 'GET': output = download_file(filename, BUCKET) return send_file(output, as_attachment=True)
def beds_utilization_rate(): CSV_PATH = './usa-hospital-beds.csv' if not os.path.exists(CSV_PATH): download_file() get_beds_utilization_rate() return render_template('bed_utilization_rate.html')
def beds(): CSV_PATH = './usa-hospital-beds.csv' if not os.path.exists(CSV_PATH): download_file() get_beds() return render_template('all_beds.html')
def download_data(path, save_name=None): s3.download_file('yelp-data-shared-labs18', path, save_name=save_name)
def test_download_file(self, client): res = download_file('bucket', 'object_name') self.assertEqual(res, 'downloads/object_name') fake_client.download_file.assert_called_once_with( 'bucket', 'object_name', 'downloads/object_name')