def print_model_scores(kfold_results): kfold_scores = [res[-1] for res in kfold_results] all_models = set( flatten([list(kfold.keys()) for kfold in kfold_scores])) for model in all_models: model_mean = np.mean( flatten([kfold[model] for kfold in kfold_scores])) print(f"{model} AUC: \t {model_mean}")
def __init__(self, features: List[Union[Feature, List[Feature]]], cache_table: str = None): """ features to be used in this builder Args: features: a list of features cache_table: Optional, if specified will look/save calculated features in the cache """ self.features = flatten(features) self.cache_table = cache_table
def main(): random.seed(42) numpy.random.seed(42) multiproc_util.force_serial = True task = None # get the feature names feature_factory = PostgresFeatureFactory(task.embedder.features, input_gs=None) all_feat_names = flatten([feat.feature_names for feat in feature_factory.features]) # create bokeh tabs tabs = [] # tabs += [Panel(child=FeatureDashboard(all_feat_names).main_panel, title="Feature Exploration")] tabs += [Panel(child=BuildingTaskDashboard(task).main_panel, title="Task")] # TODO: doesn't work for some reason tabs = Tabs(tabs=tabs) curdoc().add_root(tabs)
def all_feat_names(self) -> List[str]: return flatten([feat.feature_names for feat in self.features])
def parmap(f: Callable, X: List[object], nprocs=multiprocessing.cpu_count(), force_parallel=False, chunk_size=1, use_tqdm=False, keep_child_tqdm=True, **tqdm_kwargs) -> list: """ Utility function for doing parallel calculations with multiprocessing. Splits the parameters into chunks (if wanted) and calls. Equivalent to list(map(func, params_iter)) Args: f: The function we want to calculate for each element X: The parameters for the function (each element ins a list) chunk_size: Optional, the chunk size for the workers to work on nprocs: The number of procs to use (defaults for all cores) use_tqdm: Whether to use tqdm (default to False) tqdm_kwargs: kwargs passed to tqdm Returns: The list of results after applying func to each element Has problems with using self.___ as variables in f (causes self to be pickled) """ if len(X) == 0: return [] # like map if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size: chunk_size = 1 # use chunk_size = 1 if there is enough procs for a batch size of 1 nprocs = int(max(1, min(nprocs, len(X) / chunk_size))) # at least 1 if len(X) < nprocs: if nprocs != multiprocessing.cpu_count(): print("parmap too much procs") nprocs = len(X) # too much procs args = zip(X, [f] * len(X), range(len(X)), [keep_child_tqdm] * len(X)) if chunk_size > 1: args = list(chunk_iterator(args, chunk_size)) s_fun = _chunk_spawn_fun # spawn fun else: s_fun = _spawn_fun # spawn fun if (nprocs == 1 and not force_parallel ) or force_serial: # we want it serial (maybe for profiling) return list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs))) try: # try-catch hides bugs global proc_count old_proc_count = proc_count proc_count = nprocs p = Pool(nprocs) p.restart(force=True) # can throw if current proc is daemon if use_tqdm: retval_par = tqdm(p.imap(lambda arg: s_fun(arg), args), total=int(len(X) / chunk_size), **tqdm_kwargs) else: # import pdb # pdb.set_trace() retval_par = p.map(lambda arg: s_fun(arg), args) retval = list(retval_par) # make it like the original map if chunk_size > 1: retval = flatten(retval) p.terminate() proc_count = old_proc_count global i i += 1 except AssertionError as e: # if e == "daemonic processes are not allowed to have children": retval = list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs))) # can't have pool inside pool return retval
# ROADS (MAJOR_ROAD, OSM_LINE_TABLE, "major_road", all_radii_up_to(100, 1000), relevant_feat_types("line_length"), 1), (MINOR_ROAD, OSM_LINE_TABLE, "minor_road", all_radii_up_to(50, 250), relevant_feat_types("line_length"), 1), # BUILDING (BUILDING, OSM_POLYGON_TABLE, "building", all_radii_up_to(50, 250), relevant_feat_types("polygon_area"), 1), # (BUILDING, OSM_POLYGON_TABLE, "building", all_radii_up_to(0, 250), [Heights], 2), # doesn't work for normal osm (BUILDING, OSM_POLYGON_TABLE, "building", all_radii_up_to(max_radius=0), [AreaOf], 3), # (JUNCTIONS, JUNCTIONS_TABLE, "junction", all_radii_up_to(max_radius=250), [OSMRoute]), ] all_bundle_features = flatten([karka_bundle_features]) # TODO: bad name def create_building_features(elements=None, level_importance=10): if elements is None: elements = all_bundle_features all_features = [] for filt, table_name, obj_name, radii, features_types, importance in elements: if importance <= level_importance: for radius, feat_type in product(radii, features_types): all_features.append( feat_type(filt, table_name, obj_name, radius=radius)) return all_features