def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name=argv[0].split('.')[0], test=pcl.has_arg('--test'), ) random_seed = 123456 random.seed(random_seed) path = Path() # use the default dir_input debug = False return Bunch( arg=arg, debug=debug, path=path, path_out=path.dir_working() + arg.base_name + '-' + 'derived.csv', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (3, 4): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name=argv[0].split('.')[0], geo=pcl.get_arg('--geo'), test=pcl.has_arg('--test'), ) if arg.geo is None: usage('missing --arg') if arg.geo not in ('census_tract', 'zip5'): usage('invalid GEO value: ', + arg.geo) random_seed = 123456 random.seed(random_seed) path = Path() # use the default dir_input debug = False return Bunch( arg=arg, debug=debug, max_sale_price=85e6, # according to Wall Street Journal path=path, path_out_csv=path.dir_working() + arg.base_name + '-' + arg.geo + '.csv', path_out_occurs=path.dir_working() + arg.base_name + '-' + arg.geo + '-occurs.pickle', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not(2 <= len(argv) <= 3): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='valgbr', yyyymm=argv[1], test=pcl.has_arg('--test'), ) try: arg.yyyymm = int(arg.yyyymm) except: usage('YYYYMM not an integer') random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name = ( ('test-' if arg.test else '') + '%s.pickle' % arg.yyyymm ) # assure output directory exists dir_path = dir_working + arg.base_name + '/' if not os.path.exists(dir_path): os.makedirs(dir_path) fixed_hps = Bunch( loss='quantile', alpha=0.5, n_estimators=1000, max_depth=3, max_features=None) return Bunch( arg=arg, debug=debug, fixed_hps=fixed_hps, path_in=dir_working + 'samples-train.csv', path_out=dir_path + out_file_name, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not(4 <= len(argv) <= 7): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='ege', folds=pcl.get_arg('--folds'), rfbound=pcl.get_arg('--rfbound'), # arg.rbound is None or a string or a list of strings test=pcl.has_arg('--test'), ) if arg.rfbound is not None: if not len(arg.rfbound) == 2: usage('corret is --rfbound HP YYYYMM') arg.hp = arg.rfbound[0] arg.yyyymm = arg.rfbound[1] if not(arg.hp in ('max_depth', 'max_features')): usage('--rfbound {max_depth|max_features} YYYYMM') if arg.folds is None: usage('--folds is required') else: arg.folds = int(arg.folds) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name_base = ( ('test-' if arg.test else '') + arg.base_name + ('' if arg.rfbound is None else '-rfbound-%s-%s' % (arg.hp, arg.yyyymm)) + ('-folds-%02d' % arg.folds) ) return Bunch( arg=arg, debug=debug, path_in=dir_working + 'samples-train-validate.csv', path_out=dir_working + out_file_name_base + '.pickle', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not(4 <= len(argv) <= 5): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='rfbound', hp=argv[1], yyyymm=argv[2], folds=argv[3], test=pcl.has_arg('--test'), ) try: arg.folds = int(arg.folds) except: usage('INT not an integer; ' + str(arg.folds)) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name = ( '%s/%s%s-%s-folds-%02d.pickle' % ( arg.base_name, ('test-' if arg.test else ''), arg.hp, arg.yyyymm, arg.folds) ) # assure the output directory exists dir_path = dir_working + arg.base_name if not os.path.exists(dir_path): os.makedirs(dir_path) return Bunch( arg=arg, debug=debug, path_in=dir_working + 'samples-train-validate.csv', path_out=dir_working + out_file_name, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not (4 <= len(argv) <= 7): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='ege', folds=pcl.get_arg('--folds'), rfbound=pcl.get_arg( '--rfbound' ), # arg.rbound is None or a string or a list of strings test=pcl.has_arg('--test'), ) if arg.rfbound is not None: if not len(arg.rfbound) == 2: usage('corret is --rfbound HP YYYYMM') arg.hp = arg.rfbound[0] arg.yyyymm = arg.rfbound[1] if not (arg.hp in ('max_depth', 'max_features')): usage('--rfbound {max_depth|max_features} YYYYMM') if arg.folds is None: usage('--folds is required') else: arg.folds = int(arg.folds) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name_base = (('test-' if arg.test else '') + arg.base_name + ('' if arg.rfbound is None else '-rfbound-%s-%s' % (arg.hp, arg.yyyymm)) + ('-folds-%02d' % arg.folds)) return Bunch( arg=arg, debug=debug, path_in=dir_working + 'samples-train-validate.csv', path_out=dir_working + out_file_name_base + '.pickle', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not(2 <= len(argv) <= 3): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='linval', yyyymm=argv[1], test=pcl.has_arg('--test'), ) try: arg.yyyymm = int(arg.yyyymm) except: usage('YYYYMM not an integer') random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name = ( ('test-' if arg.test else '') + '%s.pickle' % arg.yyyymm ) # assure output directory exists dir_path = dir_working + arg.base_name + '/' if not os.path.exists(dir_path): os.makedirs(dir_path) return Bunch( arg=arg, debug=debug, path_in=dir_working + 'samples-train-validate.csv', path_out=dir_path + out_file_name, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2, 3): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='chart-05', data=pcl.has_arg('--data'), test=pcl.has_arg('--test'), ) if len(argv) == 3: both = arg.data and arg.test if not both: usage('there is an extra invocation option') random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False reduced_file_name = ('test-' if arg.test else '') + 'data.pickle' # assure output directory exists dir_path = dir_working + arg.base_name + '/' if not os.path.exists(dir_path): os.makedirs(dir_path) return Bunch( arg=arg, debug=debug, path_in_ege=dir_working + 'valgbr/*.pickle', path_reduction=dir_path + reduced_file_name, path_chart_base=dir_path, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) if pcl.has_arg('--help'): usage() arg = Bunch( base_name=argv[0].split('.')[0], test=pcl.has_arg('--test'), ) random_seed = 123456 random.seed(random_seed) path = Path() # use the default dir_input debug = False file_out_transactions = (('testing-' if arg.test else '') + arg.base_name + '-al-g-sfr' + '.csv') return Bunch( arg=arg, debug=debug, max_sale_price=85e6, # according to Wall Street Journal path=path, path_in_census_features=path.dir_working() + 'census-features-derived.csv', path_in_parcels_features_census_tract=path.dir_working() + 'parcels-features-census_tract.csv', path_in_parcels_features_zip5=path.dir_working() + 'parcels-features-zip5.csv', path_out_transactions=path.dir_working() + file_out_transactions, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) if pcl.has_arg('--help'): usage() arg = Bunch( base_name=argv[0].split('.')[0], test=pcl.has_arg('--test'), ) random_seed = 123456 random.seed(random_seed) path = Path() # use the default dir_input debug = False file_out_transactions = ( ('testing-' if arg.test else '') + arg.base_name + '-al-g-sfr' + '.csv' ) return Bunch( arg=arg, debug=debug, max_sale_price=85e6, # according to Wall Street Journal path=path, path_in_census_features=path.dir_working() + 'census-features-derived.csv', path_in_parcels_features_census_tract=path.dir_working() + 'parcels-features-census_tract.csv', path_in_parcels_features_zip5=path.dir_working() + 'parcels-features-zip5.csv', path_out_transactions=path.dir_working() + file_out_transactions, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name=argv[0].split('.')[0], test=pcl.has_arg('--test'), ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name_base = ('testing-' if arg.test else '') + arg.base_name return Bunch( arg=arg, debug=debug, fraction_test=0.2, max_sale_price=85e6, # according to Wall Street Journal path_in=dir_working + 'transactions-al-g-sfr.csv', path_out_info_reasonable=dir_working + out_file_name_base + '-info-reasonable.pickle', path_out_test=dir_working + out_file_name_base + '-test.csv', path_out_train=dir_working + out_file_name_base + '-train.csv', path_out_train_validate=dir_working + out_file_name_base + '-train-validate.csv', path_out_validate=dir_working + out_file_name_base + '-validate.csv', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='chart-07', data=pcl.has_arg('--data'), test=pcl.has_arg('--test'), ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False reduced_file_name = ('test-' if arg.test else '') + 'data.pickle' # assure output directory exists dir_path = dir_working + arg.base_name + '/' if not os.path.exists(dir_path): os.makedirs(dir_path) return Bunch( arg=arg, debug=debug, path_in_best=dir_working + 'chart-06/best.pickle', path_in_samples='../data/working/samples-train.csv', path_reduction=dir_path + reduced_file_name, path_chart_base=dir_path, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (3, 4): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name=argv[0].split('.')[0], geo=pcl.get_arg('--geo'), test=pcl.has_arg('--test'), ) if arg.geo is None: usage('missing --arg') if arg.geo not in ('census_tract', 'zip5'): usage('invalid GEO value: ', +arg.geo) random_seed = 123456 random.seed(random_seed) path = Path() # use the default dir_input debug = False return Bunch( arg=arg, debug=debug, max_sale_price=85e6, # according to Wall Street Journal path=path, path_out_csv=path.dir_working() + arg.base_name + '-' + arg.geo + '.csv', path_out_occurs=path.dir_working() + arg.base_name + '-' + arg.geo + '-occurs.pickle', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2, 3): usage('invalid number of arguments') if len(argv) == 1: usage() pcl = ParseCommandLine(argv) arg = Bunch( base_name='chart-02', hp=argv[1], data=pcl.has_arg('--data'), test=pcl.has_arg('--test'), ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name_base = ('test-' if arg.test else '') + arg.base_name + '-' + arg.hp return Bunch( arg=arg, debug=debug, path_in_ege=dir_working + 'ege-rfbound-%s-*-folds-10.pickle' % arg.hp, path_out_base=dir_working + out_file_name_base, path_data=dir_working + arg.base_name + '-' + arg.hp + '.data.pickle', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not (2 <= len(argv) <= 3): usage("invalid number of arguments") pcl = ParseCommandLine(argv) arg = Bunch(base_name="valrf", yyyymm=argv[1], test=pcl.has_arg("--test")) try: arg.yyyymm = int(arg.yyyymm) except: usage("YYYYMM not an integer") random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name = ("test-" if arg.test else "") + "%s.pickle" % arg.yyyymm # assure output directory exists dir_path = dir_working + arg.base_name + "/" if not os.path.exists(dir_path): os.makedirs(dir_path) return Bunch( arg=arg, debug=debug, path_in=dir_working + "samples-train.csv", path_out=dir_path + out_file_name, random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2, 3): usage('invalid number of arguments') if len(argv) == 1: usage() pcl = ParseCommandLine(argv) arg = Bunch( base_name='chart-02', hp=argv[1], data=pcl.has_arg('--data'), test=pcl.has_arg('--test'), ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name_base = ('test-' if arg.test else '') + arg.base_name + '-' + arg.hp return Bunch( arg=arg, debug=debug, path_in_ege=dir_working + 'ege-rfbound-%s-*-folds-10.pickle' % arg.hp, path_out_base=dir_working + out_file_name_base, path_data=dir_working + arg.base_name + '-' + arg.hp + '.data.pickle', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name=argv[0].split('.')[0], test=pcl.has_arg('--test'), ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name_base = ('testing-' if arg.test else '') + arg.base_name return Bunch( arg=arg, debug=debug, fraction_test=0.2, max_sale_price=85e6, # according to Wall Street Journal path_in=dir_working + 'transactions-al-g-sfr.csv', path_out_info_reasonable=dir_working + out_file_name_base + '-info-reasonable.pickle', path_out_test=dir_working + out_file_name_base + '-test.csv', path_out_train=dir_working + out_file_name_base + '-train.csv', path_out_train_validate=dir_working + out_file_name_base + '-train-validate.csv', path_out_validate=dir_working + out_file_name_base + '-validate.csv', random_seed=random_seed, test=arg.test, )
def make_control(argv): # return a Bunch print argv if not (1 <= len(argv) <= 2): usage('invalid number of arguments') pcl = ParseCommandLine(argv) arg = Bunch( base_name='testbest', test=pcl.has_arg('--test'), ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() debug = False out_file_name = (('test-' if arg.test else '') + '%s.pickle' % 'results') # assure output directory exists dir_path = dir_working + arg.base_name + '/' if not os.path.exists(dir_path): os.makedirs(dir_path) return Bunch( arg=arg, debug=debug, path_in_data=dir_working + 'samples-train.csv', path_in_best=dir_working + 'chart-06/best.pickle', path_out=dir_path + out_file_name, random_seed=random_seed, test=arg.test, )