def main(): make_dataset() eda_summary = generate_plots() model_train_predict(eda_summary)
def main(): # parse arguments args = parse_args() if args is None: exit() # Make dataset if args.make_dataset: print('%s - Fetching raw dataset: %s' % (datetime.datetime.now(), args.dataset)) make_dataset.make_dataset(args.dataset) # Make dataset if args.process_dataset: print('%s - Processing raw dataset: %s' % (datetime.datetime.now(), args.dataset)) process_dataset.process_dataset(args.dataset) # Build and train model if args.train_model: print('%s - Configuring and training Network: %s' % (datetime.datetime.now(), args.model)) if args.model == 'BasicModel': model = BasicModel() model.train(dataset_str = args.dataset, epoch_N = args.epoch_max, batch_N = 64) elif args.model == 'infoGAN': model = infoGAN() model.train(dataset_str = args.dataset, epoch_N = args.epoch_max, batch_size = args.batch_size) elif args.model == 'infoGAN_rgb': model = infoGAN_rgb() model.train(dataset_str = args.dataset, epoch_N = args.epoch_max, batch_size = args.batch_size) elif args.model == 'infoGAN_32x32': model = infoGAN_32x32() model.train(dataset_str = args.dataset, epoch_N = args.epoch_max, batch_size = args.batch_size) # elif args.model == 'weedGAN': # model = weedGAN() # model.train(dataset_str = args.dataset, epoch_N = 25, batch_N = 64) # Visualize results if args.visualize: print('Visualizing Results')
def parameterized_test(self, model, mode): # given: data_dir = "test-data" interim_dir = self.test_dir + "/interim" processed_dir = self.test_dir + "/processed" model_dir = self.test_dir + "/model" model_path = model_dir + ("" if mode == "full" else "_" + mode) + "/0001.txt" submission_dir = self.test_dir + "/submissions" submission_path = submission_dir + "/submission.csv" # data preparation # when: make_dataset(data_dir, interim_dir) # then: self.assertTrue(os.path.exists(interim_dir + "/test_data.pkl")) self.assertTrue(os.path.exists(interim_dir + "/test_data.pkl")) # feature engineering # when: build_features(data_dir, processed_dir) # then: self.assertTrue(os.path.exists(processed_dir + "/test_data.pkl")) self.assertTrue(os.path.exists(processed_dir + "/test_data.pkl")) # model training # when: train_model(model, mode, processed_dir, model_dir) # then: self.assertTrue(os.path.exists(model_path)) # model prediction # when: predict_model(processed_dir, model, model_path, submission_path) # then: self.assertTrue(os.path.exists(submission_path))
def builder(): """ Will be called to create all the models, after processing the dataset and building the features, which will then be accessed during usage of api.py """ print("making dataset...") logger.info("making dataset...") df = make_dataset() print("build features...") logger.info("build features...") processed_data_with_features = build_features(df, True) print("train model...") logger.info("train model...") execute_models(processed_data_with_features) print("done") logger.info("done")
def preprocessed_data(): df = make_dataset() return df
def main(build_historical: bool, use_daily: bool, **kwargs): logging.info("Updating data and executing the normalization pipeline") make_arpa_dataset(build_historical=build_historical) make_weather_dataset() make_dataset(use_daily=use_daily) predict_normalized_pollutant()
from src.data import make_dataset as mk from src.features import build_features as ft from src.models import train_models as train from src.models import test_models as test if __name__ == '__main__': mk.make_dataset() ft.generate_features() train.train_models() test.test_models()