def train(): try: print("starting training...") hyperparameters = load_json_object(hyperparameters_file_path) print("\nHyperparameters configuration:") print_json_object(hyperparameters) input_data_config = load_json_object(inputdataconfig_file_path) print("\nInput data configuration:") print_json_object(input_data_config) for key in input_data_config: print("\nList of files in {0} channel: ".format(key)) channel_path = data_files_path + key + "/" print_files_in_path(channel_path) if os.path.exists(resource_file_path): resource_config = load_json_object(resource_file_path) print("\nResource configuration:") print_json_object(resource_config) # Take the set of files and read them all into a single pandas dataframe input_files = [ os.path.join(data_files_path + "train/", file) for file in os.listdir(data_files_path + "train/") ] if len(input_files) == 0: raise ValueError(( "There are no files in {}.\n" + "This usually indicates that the channel ({}) was incorrectly specified,\n" + "the data specification in S3 was incorrectly specified or the role specified\n" + "does not have permission to access the data.").format( data_files_path + "train/", "train")) concat_data = load_raw(input_files, [label_column, feature_column]) print(concat_data.info()) preprocessor = CountVectorizer(analyzer=set) print("fitting...") preprocessor.fit(concat_data[feature_column]) print("finished fitting...") feature_column_names = preprocessor.get_feature_names() print(feature_column_names) le = LabelEncoder() le.fit(concat_data[label_column]) print("le classes: ", le.classes_) dump(preprocessor, os.path.join(model_artifacts_path, "model.joblib")) dump(le, os.path.join(model_artifacts_path, "label.joblib")) print("saved model!") except Exception as e: write_failure_file(failure_file_path, str(e)) print(e, file=sys.stderr) sys.exit(1)
def train(): try: print("\nRunning training...") if os.path.exists(hyperparameters_file_path): hyperparameters = load_json_object(hyperparameters_file_path) print('\nHyperparameters configuration:') print_json_object(hyperparameters) if os.path.exists(inputdataconfig_file_path): input_data_config = load_json_object(inputdataconfig_file_path) print('\nInput data configuration:') print_json_object(input_data_config) for key in input_data_config: print('\nList of files in {0} channel: '.format(key)) channel_path = data_files_path + key + '/' print_files_in_path(channel_path) if os.path.exists(resource_file_path): resource_config = load_json_object(resource_file_path) print('\nResource configuration:') print_json_object(resource_config) if (training_job_name_env in os.environ): print("\nTraining job name: ") print(os.environ[training_job_name_env]) if (training_job_arn_env in os.environ): print("\nTraining job ARN: ") print(os.environ[training_job_arn_env]) # This object is used to handle SIGTERM and SIGKILL signals. signal_handler = ExitSignalHandler() # Dummy net. net = None # Run training loop. epochs = 1 for x in range(epochs): print("\nRunning epoch {0}...".format(x)) time.sleep(10) if (signal_handler.exit_now): print( "Received SIGTERM/SIGINT. Saving training state and exiting." ) # Save state here. save_model_artifacts(model_artifacts_path, net) sys.exit(0) print("Completed epoch {0}.".format(x)) # At the end of the training loop, we have to save model artifacts. save_model_artifacts(model_artifacts_path, net) print("\nTraining completed!") except Exception as e: write_failure_file(failure_file_path, str(e)) print(e, file=sys.stderr) sys.exit(1)