def main(): config_file = args.path_to_config_file config, config_raw = configure_model(config_file) #If logging is enable, check that there are no records for #the selected experiment if not args.notlog: logger_uri = cfg_main['logger']['uri'] logger_db = cfg_main['logger']['db'] logger_collection = cfg_main['logger']['collection'] mongo_logger = Logger(logger_uri, logger_db, logger_collection) if mongo_logger.experiment_exists(config['experiment_name']): raise ExperimentExists(config['experiment_name']) # datasets logger.info('Loading datasets...') train, test = make_datasets(config) logger.debug('Train x shape: {} Test x shape: {}'.format(train.x.shape, test.x.shape)) # Dump datasets if dump option was selected if args.dump: logger.info('Dumping train and tests sets') datasets = [(train, 'train'), (test, 'test')] for data, name in datasets: if data is not None: filename = '{}_{}.csv'.format(config["experiment_name"], name) try: #Try to convert to dataframe, it will fail if data is empty df = data.to_df() except Exception, e: logger.info('Error saving {} as csv: {}'.format(filename, e)) finally: df.to_csv(os.path.join(path_to_dumps, filename))
def main(): config_file = args.path_to_config_file config, config_raw = configure_model(config_file) #If logging is enabled, check that there are no records for #the selected experiment if not args.notlog: logger_uri = cfg_main['logger']['uri'] logger_db = cfg_main['logger']['db'] logger_collection = cfg_main['logger']['collection'] mongo_logger = Logger(logger_uri, logger_db, logger_collection) if mongo_logger.experiment_exists(config['experiment_name']): raise ExperimentExists(config['experiment_name']) # datasets logger.info('Loading datasets...') train, test = make_datasets(config) logger.debug('Train x shape: {} Test x shape: {}'.format(train.x.shape, test.x.shape)) #Check percentage of NAs for every feature, #raise an error if at least one feature has more NAs than the #acceptable threshold logger.info('Checking training set NAs...') prop = check_nas_threshold(train.to_df(), NAS_PROPORTION_THRESHOLD) logger.debug(prop) logger.info('Checking testing set NAs...') prop = check_nas_threshold(test.to_df(), NAS_PROPORTION_THRESHOLD) logger.debug(prop) # Dump datasets if dump option was selected if args.dump: logger.info('Dumping train and tests sets') datasets = [(train, 'train'), (test, 'test')] for data, name in datasets: if data is not None: filename = '{}_{}.csv'.format(config["experiment_name"], name) try: #Try to convert to dataframe, it will fail if data is empty df = data.to_df() except Exception, e: logger.info('Error saving {} as csv: {}'.format(filename, e)) finally: df.to_csv(os.path.join(path_to_dumps, filename))
def main(): if args.warninglog: myhandler = logging.FileHandler(os.path.abspath(args.warninglog)) myhandler.setLevel('WARNING') logger.addHandler(myhandler) if args.debuglog: myhandler2 = logging.FileHandler(os.path.abspath(args.debuglog)) myhandler2.setLevel('DEBUG') logger.addHandler(myhandler2) config_file = args.path_to_config_file config, config_raw = configure_model(config_file) if args.predicttop and args.notlog: raise ValueError("You cannot save the top X predictions " "on all parcels without also logging.") #If logging is enabled, check that there are no records for #the selected experiment if not args.notlog: logger_uri = cfg_main['logger']['uri'] logger_db = cfg_main['logger']['db'] logger_collection = cfg_main['logger']['collection'] mongo_logger = Logger(logger_uri, logger_db, logger_collection) if mongo_logger.experiment_exists(config['experiment_name']): # if the user hasn't selected to overwrite the record, throw error if not args.overwritelog: raise ExperimentExists(config['experiment_name']) else: mongo_logger.delete_experiment(config['experiment_name']) # datasets logger.info('Loading datasets...') if not args.predicttop: train, test = make_datasets(config, predictset=False) logger.debug('Train x shape: {} Test x shape: {}'.format(train.x.shape, test.x.shape)) else: train, test, preds = make_datasets(config, predictset=True) logger.debug('Train x shape: {} Test x shape: {} Prediction x shape {}'\ .format(train.x.shape, test.x.shape, preds.x.shape)) #Check percentage of NAs for every feature, #raise an error if at least one feature has more NAs than the #acceptable threshold logger.info('Checking training set NAs...') prop = check_nas_threshold(train.to_df(), NAS_PROPORTION_THRESHOLD) logger.debug(prop) logger.info('Checking testing set NAs...') prop = check_nas_threshold(test.to_df(), NAS_PROPORTION_THRESHOLD) logger.debug(prop) # Dump datasets if dump option was selected if args.dump: logger.info('Dumping train and tests sets') datasets = [(train, 'train'), (test, 'test')] if args.predicttop: logger.info('Dumping prediction sets') datasets.append((preds, 'prediction')) for data, name in datasets: if data is not None: filename = '{}_{}.csv'.format(config["experiment_name"], name) try: #Try to convert to dataframe, it will fail if data is empty df = data.to_df() df.to_csv(os.path.join(path_to_dumps, filename)) except Exception, e: logger.info('Error saving {} as csv: {}'.format(filename, e)) else: logger.info('{} is None, skipping dump...'.format(name))