def main():

    make_dataset()

    eda_summary = generate_plots()

    model_train_predict(eda_summary)
Example #2
0
def main():
    # parse arguments
    args = parse_args()
    if args is None:
        exit()
    
    # Make dataset
    if args.make_dataset:
        print('%s - Fetching raw dataset: %s'  % (datetime.datetime.now(), args.dataset))
        make_dataset.make_dataset(args.dataset)
        
    # Make dataset
    if args.process_dataset:
        print('%s - Processing raw dataset: %s' % (datetime.datetime.now(), args.dataset))
        process_dataset.process_dataset(args.dataset)
        
    # Build and train model
    if args.train_model:
        print('%s - Configuring and training Network: %s' % (datetime.datetime.now(), args.model))
        

        if args.model == 'BasicModel':
            model = BasicModel()
            model.train(dataset_str = args.dataset, epoch_N = args.epoch_max, batch_N = 64)
               
        elif args.model == 'infoGAN':
            model = infoGAN()
            model.train(dataset_str = args.dataset, 
                        epoch_N = args.epoch_max, 
                        batch_size = args.batch_size)
        
        elif args.model == 'infoGAN_rgb':
            model = infoGAN_rgb()
            model.train(dataset_str = args.dataset, 
                        epoch_N = args.epoch_max, 
                        batch_size = args.batch_size)
        
        elif args.model == 'infoGAN_32x32':
            model = infoGAN_32x32()
            model.train(dataset_str = args.dataset, 
                        epoch_N = args.epoch_max, 
                        batch_size = args.batch_size)
        
        # elif args.model == 'weedGAN':
        #     model = weedGAN()
        #     model.train(dataset_str = args.dataset, epoch_N = 25, batch_N = 64)
    
    # Visualize results
    if args.visualize:
        print('Visualizing Results')
Example #3
0
    def parameterized_test(self, model, mode):
        # given:
        data_dir = "test-data"
        interim_dir = self.test_dir + "/interim"
        processed_dir = self.test_dir + "/processed"
        model_dir = self.test_dir + "/model"
        model_path = model_dir + ("" if mode == "full" else "_" +
                                  mode) + "/0001.txt"
        submission_dir = self.test_dir + "/submissions"
        submission_path = submission_dir + "/submission.csv"

        # data preparation
        # when:
        make_dataset(data_dir, interim_dir)

        # then:
        self.assertTrue(os.path.exists(interim_dir + "/test_data.pkl"))
        self.assertTrue(os.path.exists(interim_dir + "/test_data.pkl"))

        # feature engineering
        # when:
        build_features(data_dir, processed_dir)

        # then:
        self.assertTrue(os.path.exists(processed_dir + "/test_data.pkl"))
        self.assertTrue(os.path.exists(processed_dir + "/test_data.pkl"))

        # model training
        # when:
        train_model(model, mode, processed_dir, model_dir)

        # then:
        self.assertTrue(os.path.exists(model_path))

        # model prediction
        # when:
        predict_model(processed_dir, model, model_path, submission_path)

        # then:
        self.assertTrue(os.path.exists(submission_path))
Example #4
0
def builder():
    """
    Will be called to create all the models, after processing the dataset and building the features, which will then be accessed during usage of api.py
    """

    print("making dataset...")
    logger.info("making dataset...")
    df = make_dataset()

    print("build features...")
    logger.info("build features...")
    processed_data_with_features = build_features(df, True)

    print("train model...")
    logger.info("train model...")
    execute_models(processed_data_with_features)

    print("done")
    logger.info("done")
Example #5
0
def preprocessed_data():
    df = make_dataset()
    return df
def main(build_historical: bool, use_daily: bool, **kwargs):
    logging.info("Updating data and executing the normalization pipeline")
    make_arpa_dataset(build_historical=build_historical)
    make_weather_dataset()
    make_dataset(use_daily=use_daily)
    predict_normalized_pollutant()
Example #7
0
from src.data import make_dataset as mk
from src.features import build_features as ft
from src.models import train_models as train
from src.models import test_models as test

if __name__ == '__main__':

    mk.make_dataset()
    ft.generate_features()
    train.train_models()
    test.test_models()