def status_printer(_, total=None, desc=None, ncols=None): """ Manage the printing of an IPython/Jupyter Notebook progress bar widget. """ # Fallback to text bar if there's no total # DEPRECATED: replaced with an 'info' style bar # if not total: # return super(tqdm_notebook, tqdm_notebook).status_printer(file) # fp = file # Prepare IPython progress bar try: if total: pbar = IProgress(min=0, max=total) else: # No total? Show info style bar with no progress tqdm status pbar = IProgress(min=0, max=1) pbar.value = 1 pbar.bar_style = 'info' except NameError: # #187 #451 #558 raise ImportError( "FloatProgress not found. Please update jupyter and ipywidgets." " See https://ipywidgets.readthedocs.io/en/stable" "/user_install.html") if desc: pbar.description = desc if IPYW >= 7: pbar.style.description_width = 'initial' # Prepare status text ptext = HTML() # Only way to place text to the right of the bar is to use a container container = HBox(children=[pbar, ptext]) # Prepare layout if ncols is not None: # use default style of ipywidgets # ncols could be 100, "100px", "100%" ncols = str(ncols) # ipywidgets only accepts string try: if int(ncols) > 0: # isnumeric and positive ncols += 'px' except ValueError: pass pbar.layout.flex = '2' container.layout.width = ncols container.layout.display = 'inline-flex' container.layout.flex_flow = 'row wrap' display(container) return container
def _stop_lines(stop_list, lin_chunks): progress = FloatProgress(min=0, max=len(stop_list), width=975, height=10, color=syscolors.rainbow_shades[1], margin=5) progress.value = 0 display(progress) stop_lines = {} for stop in stop_list: stop_lines[stop] = set() for lin_chunk in lin_chunks[1:]: if 'N=' + str(stop) in lin_chunk: stop_lines[stop] = stop_lines[stop].union( [line_name(lin_chunk)]) progress.value += 1 return stop_lines
def status_printer(_, total=None, desc=None, ncols=None): """ Manage the printing of an IPython/Jupyter Notebook progress bar widget. """ # Fallback to text bar if there's no total # DEPRECATED: replaced with an 'info' style bar # if not total: # return super(tqdm_notebook, tqdm_notebook).status_printer(file) # fp = file # Prepare IPython progress bar if IProgress is None: # #187 #451 #558 #872 raise ImportError(WARN_NOIPYW) if total: pbar = IProgress(min=0, max=total) else: # No total? Show info style bar with no progress tqdm status pbar = IProgress(min=0, max=1) pbar.value = 1 pbar.bar_style = 'info' if ncols is None: pbar.layout.width = "20px" ltext = HTML() rtext = HTML() if desc: ltext.value = desc container = TqdmHBox(children=[ltext, pbar, rtext]) # Prepare layout if ncols is not None: # use default style of ipywidgets # ncols could be 100, "100px", "100%" ncols = str(ncols) # ipywidgets only accepts string try: if int(ncols) > 0: # isnumeric and positive ncols += 'px' except ValueError: pass pbar.layout.flex = '2' container.layout.width = ncols container.layout.display = 'inline-flex' container.layout.flex_flow = 'row wrap' return container
def _zone_stops(zones, nodes, stop_list, leg_type='contains'): if leg_type == 'contains': progress = FloatProgress(min=0, max=len(list(zones.iterrows())), width=975, height=10, color=syscolors.rainbow_shades[1], margin=5) progress.value = 0 display(progress) zone_stops = {} for zone_id, zone in zones.iterrows(): zone_stops[zone_id] = [] for stop_id, stop in nodes.loc[stop_list].iterrows(): if zone['geometry'].contains(stop['geometry']): zone_stops[zone_id].append(stop_id) progress.value += 1 if leg_type == 'nearest': centroids = zones.copy() centroids['geometry'] = zones['geometry'].apply(lambda g: g.centroid) stops = nodes.loc[stop_list] links_a = spatial.nearest(stops, centroids).rename(columns={ 'ix_many': 'zone', 'ix_one': 'stop' }) links_b = spatial.nearest(centroids, stops).rename(columns={ 'ix_one': 'zone', 'ix_many': 'stop' }) links = pd.concat([links_a, links_b]).drop_duplicates() zone_stops = dict(links.groupby('zone')['stop'].agg(lambda s: list(s))) return zone_stops
def dijkstra_powered_single_source_labels(source, graph, start_from=0, infinity=999999, spread=1, absolute=0, unique_route_sets=True, max_transfer=3, stop_iteration=100000, debug=False, cutoff=float('inf'), max_stack=100000): """ From a given source, search a graph for the best paths to all the stops. Takes parameters to not only look for the best path to every destination but a 'relevant' set of paths. :param source: source of the branch and bound search :param graph: searched graph (networkx DiGraph) :param data: edge data dictionary {edge_index: {'destination': edge_destination, 'route_id': edge_route}} :param stop_set: set of the stops of the actual network (stations) :param start_from: first label_id to use :param infinity: number to use as infinity in order to initiate the distance of the nodes to the source :param spread: if the cost to a node is bigger than spread*best_cost to this node : the search stops :param absolute: actually, the search only stops if cost > spread*best_cost AND cost-best_cost > absolute :param unique_route_sets: if True, when a path does not beat the best_cost to a node : it is only kept if it uses a route set that is not used by another path to the node. :param max_transfer: the search stops when the number of routes (footpaths and connections altogether count for a route) is over max_transfer + 2 :return: a list of labels that track the search for the best paths to all the stops from the source """ stop_set = {node['destination'] for node in graph.nodes.values()} root = { 'stop': graph.node[source]['destination'], 'node': source, 'parent': 0, 'cumulative': 0, 'visited': [source], 'route': frozenset([0]), 'cost': 0 } pile = [root] label_id = iter(range(start_from, stop_iteration)) store = [] dijkstra = nx.single_source_dijkstra_path_length(graph, source) tolerated = { key: best * spread + absolute for key, best in dijkstra.items() } node_set = graph.edge.keys() data = graph.node stack_progress = FloatProgress(min=0, max=max_stack, width=975, height=10, color=syscolors.rainbow_shades[1], margin=5) stack_progress.value = 0 display(stack_progress) iteration_progress = FloatProgress(min=0, max=stop_iteration, width=975, height=10, color=syscolors.rainbow_shades[0], margin=5) iteration_progress.value = 0 display(iteration_progress) def next_labels(label, label_id): stop = label['stop'] node = label['node'] route = label['route'] cumulative = label['cumulative'] cost = label['cost'] label['label_id'] = iteration_progress.value = label_id visited = label['visited'] store.append(label) if len(route) - 2 > max_transfer: return [] # the eggress links have the save stop as # the transit link that preceed them, they are free try: neighbors = graph.edge[node] except KeyError: print_if_debug('not in node_set', debug) return [] # the node has no neighbors - no next labels if cumulative > cutoff: print_if_debug('cutoff', debug) return [] if cumulative > tolerated[node]: print_if_debug('dijkstra', debug) return [] proto_labels = [{ 'node': key, 'stop': data[key]['destination'], 'parent': label_id, 'cost': value['weight'], 'cumulative': cumulative + value['weight'], 'visited': visited + [data[key]['destination']], 'route': frozenset(route.union({data[key]['route_id']})) } for key, value in neighbors.items() if data[key]['destination'] not in visited[:-1]] # an egress has the same destination as the link it follows [:-2] print_if_debug(('proto_labels_length', len(proto_labels)), debug) return proto_labels while len(pile) and len(pile) < max_stack: # on remplace le dernier élément de la pile par tous ses enfants pile = next_labels(pile.pop(), next(label_id)) + pile stack_progress.value = len(pile) return store
def __init__(self, zones=None, nodes=None, text=None, file=None, edges=None, build_geometries=False, build_graph=False, sep='line name', leg_type='nearest', prj=None): progress = FloatProgress(min=0, max=5, width=975, height=10, color=syscolors.rainbow_shades[1], margin=5) progress.value = 1 display(progress) if not text and file: with open(file, 'r') as lin_file: text = lin_file.read() equal = re.compile('[ ]*[=]+[ ]*') coma = re.compile('[ ]*[,]+[ ]*') lower_text = text.lower().replace('n=', 'N=').replace('rt=', 'RT=').replace( '<<pt>>', '<<pt>>') self.text = coma.sub(', ', equal.sub('=', lower_text.replace( '"', "'"))) #: raw text of the .LIN (str) stop_list = _stop_list(self.text) self.lin_chunks = self.text.split(sep) self.sep = sep self._to_dict() self.line_names = [line_name(c) for c in self.lin_chunks] self.line_names = [ name for name in self.line_names if name != 'not_a_line' ] if zones is not None: zone_stops = _zone_stops(zones, nodes, stop_list, leg_type) stop_lines = _stop_lines(stop_list, self.lin_chunks) zone_lines = _zone_lines(zone_stops, stop_list, stop_lines) hubs = _hubs(zone_stops, stop_lines, zone_lines) self.zone_stops = zone_stops #: dictionary of the stops of each zone {zone: [stops that are in the zone]} self.stop_lines = stop_lines #: dictionary of the lines of each stop {stop: [lines that stop]} self.zone_lines = zone_lines #: dictionary of the lines of each zone {zone: [lines that stop in the zone]} self.hubs = hubs #: minimal set of nodes that are necessary to keep the keep zone_lines stable while pruning zone_stops self.hubs_and_terminus = self.hubs.union(self.find_endpoints()) self.transitlegs = _transitlegs( self.stop_lines ) #: list of stop<->line links (based on self.stop_lines) self.nontransitlegs = _nontransitlegs( self.zone_stops ) #: list of zone<->stop links (based on self.zone_stops) self.stop_list = stop_list self.zones = zones #: GeoDataFrame of the zones : str self.prj = prj self.nodes = nodes #: GeoDataFrame of the nodes self.line_count = _line_count(text) #: line count by node self.data = self.geo_dataframe( geometry=False) #: data organized as a dataframe progress.value += 1 if build_graph: self.connection_graph = nx.Graph( self.transitlegs + self.nontransitlegs ) #: nx.Graph built with self.nontransitlegs and self.transitlegs self.path_matrix = _path_matrix( self.connection_graph, self.zones ) #: OD matrix that contains the path and the skims of each OD pair in the zoning (base on path_matrix) progress.value += 1 if build_geometries: geometries = pandasshp.od_matrix(zones) self.path_matrix_geometries = pd.merge( self.path_matrix, geometries, on=['origin', 'destination'] ) #: OD matrix that contains the path and the skims of each OD pair in the zoning + the geometry progress.value += 1 if edges is not None: self.dijkstra = DijkstraMonkey(edges.values) progress.value += 1
def float_progress(min_, max_): prog = FloatProgress(min=min_, max=max_) display(prog) for i in linspace(min_, max_, 100): time.sleep(0.1) prog.value = i
def read(self, dataIds=(), tracts=(), tract=None, patches=(), patch=None, filters=None, filter=None, extend=None, copy=True, progress=False): dataIDs = list(dataIds) if filters is None: if filter is None: filters = self.filters else: filters = (filter,) if tract is not None: tracts = tuple(tracts) + (tract,) if patch is not None: patches = tuple(patches) + (patch,) for t in tracts: for p in patches: if not isinstance(p, basestring): p = "%s,%s" % p dataIDs.append(dict(tract=t, patch=p)) if extend is not None: assert extend.schema == self.outSchema catalog = lsst.afw.table.SourceCatalog(extend.table) catalog.extend(extend) else: catalog = lsst.afw.table.SourceCatalog(self.outSchema) if progress: try: from IPython.html.widgets import FloatProgress from IPython.display import display progressBar = FloatProgress(min=0, max=len(dataIDs)) display(progressBar) except RuntimeError: progressBar = None for n, dataID in enumerate(dataIDs): subCat = lsst.afw.table.SourceCatalog(catalog.table) refCat = self.butler.get("deepCoadd_ref", immediate=True, flags=lsst.afw.table.SOURCE_IO_NO_FOOTPRINTS, **dataID) subCat.extend(refCat, mapper=self.refMapper) subCat[self.tractKey][:] = dataID["tract"] patchX, patchY = (int(p) for p in dataID["patch"].split(",")) subCat[self.patchXKey][:] = patchX subCat[self.patchXKey][:] = patchY calibs = dict() for b in self.filters: coadd = self.butler.get("deepCoadd", immediate=True, filter="HSC-%s" % b.upper(), **dataID) calibs[b] = coadd.getCalib() for b, mapper in self.measMappers.iteritems(): measCat = self.butler.get("deepCoadd_meas", immediate=True, flags=lsst.afw.table.SOURCE_IO_NO_FOOTPRINTS, filter="HSC-%s" % b.upper(), **dataID) for inRecord, outRecord in zip(measCat, subCat): outRecord.assign(inRecord, mapper) for b, mapper in self.forcedMappers.iteritems(): forcedCat = self.butler.get("deepCoadd_forced_src", immediate=True, flags=lsst.afw.table.SOURCE_IO_NO_FOOTPRINTS, filter="HSC-%s" % b.upper(), **dataID) for inRecord, outRecord in zip(forcedCat, subCat): outRecord.assign(inRecord, mapper) for b, magc in self.measMags.iteritems(): for m in magc: m(subCat, calibs[b]) for b, magc in self.forcedMags.iteritems(): for m in magc: m(subCat, calibs[b]) catalog.extend(subCat, False) if progress: if progressBar is not None: progressBar.value = n + 1 else: print "Loaded %s (%d of %d)" % (dataID, n + 1, len(dataIDs)) catalog.sort() if copy: catalog = catalog.copy(deep=True) return catalog
def performRandomQuickTrainigAnalysis(X_train, y_train, nvalidate, windown=60, nanalysis=-1): """ Cross-Validate the model train the model using a sliding window of size windown the training is not progressive cumulative, a new tree every time and will produce nanalysis (array) of size number of window movements """ # nvalidate number of samples to use for validation pshifts = round(X_train.index.size - windown + 1 - nvalidate) # possible shifts N-windown+1 print('Size train set: ', X_train.shape) print('samples in each window: ', windown) if nanalysis < 0: # number of samples default, equal number of # windows inside data nanalysis = round((X_train.index.size - nvalidate) / windown) #elif nanalysis >= pshifts: # print("Error") # return print('number of analysis: ', nanalysis) clfmodel = ExtraTreesClassifier(n_estimators=700, n_jobs=-1) step = int(round(pshifts / nanalysis)) # window shift step in samples #diff = pshifts- shifts = range(0, pshifts, step) accuracies = np.zeros(len(shifts)) # store the result of cross-validation iaccuracies = np.zeros( len(shifts)) # index of the last sample in the window f = FloatProgress(min=0, max=nanalysis) display(f) # sliding window with step sample of shift for j, i in enumerate(shifts): # shift, classify and cross-validate f.value = j # counter of analysis # create a random begin for the window i = np.random.randint(0, X_train.index.size - windown - nvalidate) # train using the window samples X_trainFolds = X_train[i:i + windown] y_trainFolds = y_train[i:i + windown] # test using the samples just after the window X_testFold = X_train[i + windown + 1:i + windown + nvalidate] y_testFold = y_train[i + windown + 1:i + windown + nvalidate] clfmodel.fit(X_trainFolds, y_trainFolds) accuracies[j] = clfmodel.score(X_testFold, y_testFold) iaccuracies[j] = i + windown return (iaccuracies, accuracies), clfmodel
def performCV_analysis(X_train, y_train, windown, nanalysis=-1, nvalidate=2, algorithm='ET'): """ Cross-Validate the model train the model using a sliding window of size windown the training is progressive cumulative and will produce nanalysis (array) of size number of window movements """ # nvalidate number of samples to use for validation pshifts = round(X_train.index.size - windown + 1 - nvalidate) # possible shifts N-windown+1 print('Size train set: ', X_train.shape) print('samples in each window: ', windown) if nanalysis < 0: # number of samples default, equal number of # windows inside data nanalysis = round((X_train.index.size - nvalidate) / windown) print('number of analysis: ', nanalysis) elif nanalysis >= pshifts: print("Error") return if algorithm == 'RF': # classification model # random forest binary classifier clf = RandomForestClassifier(n_estimators=700, n_jobs=-1) else: # extra tree binary classifier clf = ExtraTreesClassifier(n_estimators=700, n_jobs=-1) step = int(round(pshifts / nanalysis)) # window shift step in samples #diff = pshifts- shifts = range(0, pshifts, step) accuracies = np.zeros(len(shifts)) # store the result of cross-validation iaccuracies = np.zeros( len(shifts)) # index of the last sample in the window f = FloatProgress(min=0, max=nanalysis) display(f) # sliding window with step sample of shift for j, i in enumerate(shifts): # shift, classify and cross-validate f.value = j # counter of analysis # train using the window samples X_trainFolds = X_train[i:i + windown] y_trainFolds = y_train[i:i + windown] # test using the samples just after the window X_testFold = X_train[i + windown + 1:i + windown + nvalidate] y_testFold = y_train[i + windown + 1:i + windown + nvalidate] clf.fit(X_trainFolds, y_trainFolds) accuracies[j] = clf.score(X_testFold, y_testFold) iaccuracies[j] = i + windown return (iaccuracies, accuracies), clf
def csa(_links, start, infinity=999999, connection_time=False, origins=False): links = _links.copy() origin_set = set(links['origin']).intersection( set(origins)) if origins else set(links['origin']) stop_set = set(links['origin']).union(set(links['destination'])) progress = FloatProgress(min=0, max=len(origin_set), width=975, height=10, color=syscolors.rainbow_shades[1], margin=5) progress.value = 1 display(progress) links['reachable'] = False csa_connections = links.to_dict(orient='records') earliest_arrival_time_dict = {} earliest_arrival_link_dict = {} reachable_connections_dict = {} reachable_trips_dict = {} connection_time = connection_time if connection_time else { s: 0 for s in stop_set } print(len(origin_set)) for origin in list(origin_set): progress.value += 1 reachable_connections = {l: 0 for l in list(links['index'])} reachable_trips = {t: 0 for t in list(links['trip_id'])} earliest_arrival_time = {s: infinity for s in stop_set} earliest_arrival_time[origin] = start earliest_arrival_link = {} def is_reachable(label): r = reachable_trips[label['trip_id']] or \ earliest_arrival_time[label['origin']] + connection_time[label['destination']] \ <= label['departure_time'] return r def scan(label): reachable = is_reachable(label) reachable_trips[label['trip_id']], reachable_connections[ label['index']] = reachable, reachable if reachable: if earliest_arrival_time[ label['destination']] > label['arrival_time']: earliest_arrival_time[ label['destination']] = label['arrival_time'] earliest_arrival_link[ label['destination']] = label['index'] for connection in csa_connections: scan(connection) earliest_arrival_time_dict[origin] = earliest_arrival_time earliest_arrival_link_dict[origin] = earliest_arrival_link reachable_connections_dict[origin] = reachable_connections reachable_trips_dict[origin] = reachable_trips return { 'earliest_arrival_time_dict': earliest_arrival_time_dict, 'earliest_arrival_link_dict': earliest_arrival_link_dict, 'reachable_connections_dict': reachable_connections_dict, 'reachable_trips_dict': reachable_trips }
alphabeta_use = t[1] alphabeta_sd_use = t[2] d = t[3] d_shift = t[4] d_sd = t[5] n0 = t[6] TCP_pop = t[-2] TCPs = t[-3] nom_doses = t[-1] d_interest = t[8] frac_interest = t[9] TCP_cure_at_d_interest = t[10] max_d = 100 results_array = np.append(results_array,TCP_cure_at_d_interest) f.value = i # for updating progress bar ## include model parameters into array and insert before results param_array = np.array([]) param_array = np.append(param_array,n) param_array = np.append(param_array,k) param_array = np.append(param_array,alphabeta_use) param_array = np.append(param_array,alphabeta_sd_use) param_array = np.append(param_array,d) param_array = np.append(param_array,d_shift) param_array = np.append(param_array,d_sd) param_array = np.append(param_array,n0) param_array = np.append(param_array,max_d) param_array = np.append(param_array,d_interest) #param_array = np.append(param_array,frac_of_interest)