def setUpClass(cls): print('Setting up TestMakeDataset class...') main() cls.data = pd.read_csv(os.path.join(OUTPUT_DIR, "owid-energy-data.csv")) cls.codebook = pd.read_csv( os.path.join(OUTPUT_DIR, "owid-energy-codebook.csv")) cls.index_cols = ['country', 'year', 'iso_code']
def main(): # create logger with 'my_application' logger = logging.getLogger('my_application') logger.setLevel(logging.DEBUG) # create file handler which logs even debug messages fh = logging.FileHandler('./myapp.log') fh.setLevel(logging.DEBUG) # create console handler with a higher log level ch = logging.StreamHandler() ch.setLevel(logging.ERROR) # create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger logger.addHandler(fh) logger.addHandler(ch) logger.info('Execution started') #make_dataset data_files_list_path = './data/interim/LCA_files_list.txt' input_df = make_dataset.main(data_files_list_path) #Ensure input_df is not empty assert input_df.shape[0] > 0, logger.exception('Input DataFrame is empty') logger.info('Dataset imported') #build_features X, y = build_features.main(input_df) logger.info('Features built, X shape %d %d, y shape %d', X.shape[0], X.shape[1], y.shape[0]) #if mode==Train: #train_model logger.info('Model trained') #elif mode==Predict: #make_prediction logger.info('Prediction made') #explain_model_prediction logger.info('Model explained')
module_logger.info('Building features complete.') #save pipeline when method is fit, fit_transform if method in ['fit', 'fit_transform']: dump(build_feature_pipe, open('./models/build_feature_pipe.pkl', 'wb')) dump(all_preprocess, open('./models/preprocess_pipe.pkl', 'wb')) module_logger.info('Pipeline saved.') return X, y if __name__ == '__main__': import make_dataset from pickle import load data_files_list_path = './data/interim/LCA_files_list.txt' input_df = make_dataset.main(path=data_files_list_path, file_type='file_list') #For training the very first time #X,y = main(input_df, build_feature_pipe=None, all_preprocess=None, method='fit') #print(X.shape) #print(y.shape) #For iterative training build_feature_pipe = load(open('./models/build_feature_pipe.pkl', 'rb')) all_preprocess = load(open('./models/preprocess_pipe.pkl', 'rb')) X, y = main(input_df, build_feature_pipe, all_preprocess, method='inverse') print(X.shape) print(y.shape)
############################################################################### # need to make a dataset directory where the new datasets will live check_make('{0}/dataset'.format(job_directory)) # need to make a config directory where the oodles of new configs live check_make('{0}/configs'.format(job_directory)) # need to make a logs directory to check out how my jobs performed check_make('{0}/logs'.format(job_directory)) ############################################################################### # make fits dataset ############################################################################### if args.dataset: from make_dataset import main main(job_directory + '/dataset/', **make_dataset_kwargs) ############################################################################### # make wizard dataset (needed to determine number of jobs to submit) ############################################################################### # edit config to only run make_directories make_directories_config = config.copy() make_directories_config['run'] = ['make_directories'] # execute config file with wizard wizard(make_directories_config) if args.dataset: # edit config to only run dataset dataset_config = config.copy() dataset_config['run'] = ['make_directories', 'dataset']
print('f1-score:', f1) module_logger.info('Model evaluation metrics: [%d,%d]', f1[0], f1[1]) if __name__ == '__main__': import make_dataset import build_features from pickle import load #data_files_list_path='./data/interim/LCA_files_list.txt' #input_df=make_dataset.main(path=data_files_list_path,file_type='file_list') #For training the very first time #''' data_path = './data/interim/LCA_dataset_sample10000.xlsx' input_df = make_dataset.main(path=data_path, file_type='data_file') X, y = build_features.main(input_df, build_feature_pipe=None, all_preprocess=None, method='fit_transform') main(model=None, action='train', X=X, y=y) #''' #for incremental training of existing model ''' data_path='./data/interim/LCA_dataset_sample1000.xlsx' input_df=make_dataset.main(path=data_path,file_type='data_file') X,y = build_features.main(input_df, build_feature_pipe=None, all_preprocess=None, method='fit_transform') model=load(open('./models/adaboost_batch_train.pkl','rb')) main(model=model,action='train',X=X,y=y) '''