Python make_dataset Examples

Programming Language: Python

Namespace/Package Name: src.data.make_dataset

Method/Function: make_dataset

Examples at hotexamples.com: 7

Python make_dataset - 7 examples found. These are the top rated real world Python examples of src.data.make_dataset.make_dataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: run-model-train.py Project: nraymond82/DS_Queen_AI_Healthcare_Hackathon-1

def main():

    make_dataset()

    eda_summary = generate_plots()

    model_train_predict(eda_summary)

Example #2

Show file

def main():
    # parse arguments
    args = parse_args()
    if args is None:
        exit()
    
    # Make dataset
    if args.make_dataset:
        print('%s - Fetching raw dataset: %s'  % (datetime.datetime.now(), args.dataset))
        make_dataset.make_dataset(args.dataset)
        
    # Make dataset
    if args.process_dataset:
        print('%s - Processing raw dataset: %s' % (datetime.datetime.now(), args.dataset))
        process_dataset.process_dataset(args.dataset)
        
    # Build and train model
    if args.train_model:
        print('%s - Configuring and training Network: %s' % (datetime.datetime.now(), args.model))
        

        if args.model == 'BasicModel':
            model = BasicModel()
            model.train(dataset_str = args.dataset, epoch_N = args.epoch_max, batch_N = 64)
               
        elif args.model == 'infoGAN':
            model = infoGAN()
            model.train(dataset_str = args.dataset, 
                        epoch_N = args.epoch_max, 
                        batch_size = args.batch_size)
        
        elif args.model == 'infoGAN_rgb':
            model = infoGAN_rgb()
            model.train(dataset_str = args.dataset, 
                        epoch_N = args.epoch_max, 
                        batch_size = args.batch_size)
        
        elif args.model == 'infoGAN_32x32':
            model = infoGAN_32x32()
            model.train(dataset_str = args.dataset, 
                        epoch_N = args.epoch_max, 
                        batch_size = args.batch_size)
        
        # elif args.model == 'weedGAN':
        #     model = weedGAN()
        #     model.train(dataset_str = args.dataset, epoch_N = 25, batch_N = 64)
    
    # Visualize results
    if args.visualize:
        print('Visualizing Results')

Example #3

Show file

    def parameterized_test(self, model, mode):
        # given:
        data_dir = "test-data"
        interim_dir = self.test_dir + "/interim"
        processed_dir = self.test_dir + "/processed"
        model_dir = self.test_dir + "/model"
        model_path = model_dir + ("" if mode == "full" else "_" +
                                  mode) + "/0001.txt"
        submission_dir = self.test_dir + "/submissions"
        submission_path = submission_dir + "/submission.csv"

        # data preparation
        # when:
        make_dataset(data_dir, interim_dir)

        # then:
        self.assertTrue(os.path.exists(interim_dir + "/test_data.pkl"))
        self.assertTrue(os.path.exists(interim_dir + "/test_data.pkl"))

        # feature engineering
        # when:
        build_features(data_dir, processed_dir)

        # then:
        self.assertTrue(os.path.exists(processed_dir + "/test_data.pkl"))
        self.assertTrue(os.path.exists(processed_dir + "/test_data.pkl"))

        # model training
        # when:
        train_model(model, mode, processed_dir, model_dir)

        # then:
        self.assertTrue(os.path.exists(model_path))

        # model prediction
        # when:
        predict_model(processed_dir, model, model_path, submission_path)

        # then:
        self.assertTrue(os.path.exists(submission_path))

Example #4

Show file

def builder():
    """
    Will be called to create all the models, after processing the dataset and building the features, which will then be accessed during usage of api.py
    """

    print("making dataset...")
    logger.info("making dataset...")
    df = make_dataset()

    print("build features...")
    logger.info("build features...")
    processed_data_with_features = build_features(df, True)

    print("train model...")
    logger.info("train model...")
    execute_models(processed_data_with_features)

    print("done")
    logger.info("done")

Example #5

Show file

def preprocessed_data():
    df = make_dataset()
    return df

Example #6

Show file

File: uploader.py Project: ndricca/covid-pollution-effect

def main(build_historical: bool, use_daily: bool, **kwargs):
    logging.info("Updating data and executing the normalization pipeline")
    make_arpa_dataset(build_historical=build_historical)
    make_weather_dataset()
    make_dataset(use_daily=use_daily)
    predict_normalized_pollutant()

Example #7

Show file

from src.data import make_dataset as mk
from src.features import build_features as ft
from src.models import train_models as train
from src.models import test_models as test

if __name__ == '__main__':

    mk.make_dataset()
    ft.generate_features()
    train.train_models()
    test.test_models()