Ejemplo n.º 1
0
def load_data(directory, do_fit2D=False, do_filtering=False):
    """Loads all data from 'directory' initially to all_data (unsorted list), and then to dictionary structure dataD
    folderN1  ----    shot_typeN1   ----  [list of Image_Load instances]
                      shot_typeN2   ----  [list of Image_Load instances]
                     ....
    folderN2  ----    shot_typeN1   ----  [list of Image_Load instances]
                      shot_typeN2   ----  [list of Image_Load instances]
                     ....
    By default does not fit each image 2D-gauss"""
    import os, re
    dirs = [
        os.path.join(directory, dr) for dr in os.listdir(directory)
        if re.match(r'[-+]?[0-9.]+ms', dr)
    ]
    all_data = []
    w = FloatProgress(min=0, max=len(dirs), value=0)
    w.description = 'Loading in progress...'
    display(w)
    for dr in dirs:
        w.value += 1
        files = [
            os.path.join(dr, fl) for fl in os.listdir(dr)
            if re.match(r'.*_\d+.png', fl)
        ]
        for url in files:
            new_im = Image_Load(url, do_fit2D, do_filtering)
            if new_im.isgood:
                all_data.append(new_im)
    w.bar_style = 'success'
    w.description = 'Loading Done'
    #     all_data = list(flatten(map(single_directory_load, dirs ,[do_fit2D]*len(dirs), [do_filtering]*len(dirs))))
    print('Total number of images: ', len(all_data))
    return all_data
Ejemplo n.º 2
0
def normalise_individual_image(dictionary,
                               signal_shot,
                               calibration_shot,
                               attribute,
                               index=None,
                               do_fit2D=False):
    """normalize each image using attribute[index] value - usually 'total' or 'x_data_fit[0]'
        returns constracted dictionary (like what returns 'load_data()' function"""
    norm_data = dict()
    w = FloatProgress(min=0, max=len(dictionary), value=0)
    w.description = 'Normalizing in progress...'
    display(w)
    for folderN, f_dict in dictionary.items():
        w.value += 1
        calibrated_images = []
        for s_elem in f_dict[signal_shot]:
            c_elems = [
                c_elem for c_elem in f_dict[calibration_shot]
                if c_elem.shotN == s_elem.shotN
            ]
            if c_elems == []:
                print('s_elem.image_url has no calibration image')
                continue
            calibrated_images = append(
                calibrated_images,
                Image_Fitted(
                    s_elem.image / get_value(c_elems[0], attribute, index),
                    do_fit2D))
        if calibrated_images != []:
            norm_data[folderN] = dict()
            norm_data[folderN][signal_shot] = calibrated_images
    w.bar_style = 'success'
    w.description = 'Normalizing Done'
    print('Normalization is complited')
    return norm_data
Ejemplo n.º 3
0
    def print_status(self, n, total, elapsed):
        from IPython.html.widgets import FloatProgress
        desc = self.build_str_meter(n, total, elapsed)
        if self._F is None:
            self._F = FloatProgress(min=0, max=total, description=desc)
            display(self._F)

        self._F.value = n
        self._F.description = desc
Ejemplo n.º 4
0
def rearrange_data(all_data):
    dataD = dict()
    w = FloatProgress(min=0, max=len(all_data), value=0)
    w.description = 'Rearranging in progress...'
    display(w)
    for elem in all_data:
        w.value += 1
        if elem.folderN not in dataD:
            dataD[elem.folderN] = dict()
        d = dataD[elem.folderN]
        if elem.shot_typeN not in d:
            d[elem.shot_typeN] = []
        d[elem.shot_typeN].append(elem)
    w.bar_style = 'success'
    w.description = 'Rearranging Done'
    print('Rearranging to dictionary is complited')
    return dataD
Ejemplo n.º 5
0
def _stop_lines(stop_list, lin_chunks):
    progress = FloatProgress(min=0,
                             max=len(stop_list),
                             width=975,
                             height=10,
                             color=syscolors.rainbow_shades[1],
                             margin=5)
    progress.value = 0
    display(progress)
    stop_lines = {}
    for stop in stop_list:
        stop_lines[stop] = set()
        for lin_chunk in lin_chunks[1:]:
            if 'N=' + str(stop) in lin_chunk:
                stop_lines[stop] = stop_lines[stop].union(
                    [line_name(lin_chunk)])
        progress.value += 1
    return stop_lines
Ejemplo n.º 6
0
def normalise_avr_image(dictionary,
                        signal_shot,
                        calibration_shot,
                        attribute,
                        index=None,
                        do_fit2D=True):
    """normalize image from evarage dictionary using attribute[index] value - usually 'total' or 'x_data_fit[0]'
        returns constracted dictionary (like what returns 'average_data()' function"""
    norm_data = dict()
    w = FloatProgress(min=0, max=len(dictionary), value=0)
    w.description = 'Normalizing in progress...'
    display(w)
    for folderN, f_dict in dictionary.items():
        w.value += 1
        norm_data[folderN] = dict()
        norm_data[folderN][signal_shot] = Image_Fitted(
            f_dict[signal_shot].image /
            get_value(f_dict[calibration_shot], attribute, index), do_fit2D)
    w.bar_style = 'success'
    w.description = 'Normalizing Done'
    print('Normalization is complited')
    return norm_data
Ejemplo n.º 7
0
def sift(dataD, confidence_interval=0.1):
    """Sifts (filters) data on empty images by using average information and comperes centers  of 1D gaussian fits.
    If difference is larger the 'confidence_interval' from the average value, the image would be removed from dataD"""
    w = FloatProgress(min=0, max=len(dataD), value=0)
    w.description = 'Sifting in progress...'
    display(w)
    for folderN, folder_dict in dataD.items():
        w.value += 1
        for shot_typeN, shot_list in folder_dict.items():
            #print(folderN, shot_typeN)
            avr_inf = Avr_inf(shot_list, do_fit2D=False)
            to_remove = []
            for elem in shot_list:
                if abs(elem.x_data_fit[1] - avr_inf.x_data_fit[1]
                       ) / avr_inf.x_data_fit[1] > confidence_interval or abs(
                           elem.y_data_fit[1] - avr_inf.y_data_fit[1]
                       ) / avr_inf.y_data_fit[1] > confidence_interval:
                    to_remove.append(elem)
            for elem in to_remove:
                print('remove element', shot_list.index(elem), elem.image_url)
                shot_list.remove(elem)
    w.bar_style = 'success'
    w.description = 'Sifting Done'
Ejemplo n.º 8
0
def average_data(dataD, do_fit2D=True):
    """Averages data from dataD to dictionary structure avr_dataD
    folderN1  ----    shot_typeN1   ----  Avr_inf instances
                      shot_typeN2   ----  Avr_inf instances
                     ....
    folderN2  ----    shot_typeN1   ----  Avr_inf instances
                      shot_typeN2   ----  Avr_inf instances
                     ....
    By default does fit each average image 2D-gauss"""
    avr_dataD = dict()
    w = FloatProgress(min=0, max=len(dataD), value=0)
    w.description = 'Averaging in progress...'
    display(w)
    for folderN, folder_dict in dataD.items():
        w.value += 1
        avr_dataD[folderN] = dict()
        temp_dict = avr_dataD[folderN]
        for shot_typeN, shot_list in folder_dict.items():
            if shot_list != []:
                temp_dict[shot_typeN] = Avr_inf(shot_list, do_fit2D)
    w.bar_style = 'success'
    w.description = 'Averaging Done'
    print('Averaging is complited')
    return avr_dataD
Ejemplo n.º 9
0
def _zone_stops(zones, nodes, stop_list, leg_type='contains'):

    if leg_type == 'contains':
        progress = FloatProgress(min=0,
                                 max=len(list(zones.iterrows())),
                                 width=975,
                                 height=10,
                                 color=syscolors.rainbow_shades[1],
                                 margin=5)
        progress.value = 0
        display(progress)
        zone_stops = {}
        for zone_id, zone in zones.iterrows():
            zone_stops[zone_id] = []
            for stop_id, stop in nodes.loc[stop_list].iterrows():
                if zone['geometry'].contains(stop['geometry']):
                    zone_stops[zone_id].append(stop_id)
            progress.value += 1

    if leg_type == 'nearest':
        centroids = zones.copy()
        centroids['geometry'] = zones['geometry'].apply(lambda g: g.centroid)
        stops = nodes.loc[stop_list]

        links_a = spatial.nearest(stops, centroids).rename(columns={
            'ix_many': 'zone',
            'ix_one': 'stop'
        })
        links_b = spatial.nearest(centroids, stops).rename(columns={
            'ix_one': 'zone',
            'ix_many': 'stop'
        })
        links = pd.concat([links_a, links_b]).drop_duplicates()
        zone_stops = dict(links.groupby('zone')['stop'].agg(lambda s: list(s)))

    return zone_stops
Ejemplo n.º 10
0
def dijkstra_powered_single_source_labels(source,
                                          graph,
                                          start_from=0,
                                          infinity=999999,
                                          spread=1,
                                          absolute=0,
                                          unique_route_sets=True,
                                          max_transfer=3,
                                          stop_iteration=100000,
                                          debug=False,
                                          cutoff=float('inf'),
                                          max_stack=100000):
    """
    From a given source, search a graph for the best paths to all the stops.
    Takes parameters to not only look for the best path to every destination but a 'relevant' set of paths.

    :param source: source of the branch and bound search
    :param graph: searched graph (networkx DiGraph)
    :param data: edge data dictionary {edge_index: {'destination': edge_destination, 'route_id': edge_route}}
    :param stop_set: set of the stops of the actual network (stations)
    :param start_from: first label_id to use
    :param infinity: number to use as infinity in order to initiate the distance of the nodes to the source
    :param spread: if the cost to a node is bigger than spread*best_cost to this node : the search stops
    :param absolute: actually, the search only stops if cost > spread*best_cost AND cost-best_cost > absolute
    :param unique_route_sets: if True, when a path does not beat the best_cost to a node : it is only kept if it uses
        a route set that is not used by another path to the node.
    :param max_transfer: the search stops when the number of routes (footpaths and connections altogether count
        for a route) is over max_transfer + 2
    :return: a list of labels that track the search for the best paths to all the stops from the source
    """

    stop_set = {node['destination'] for node in graph.nodes.values()}

    root = {
        'stop': graph.node[source]['destination'],
        'node': source,
        'parent': 0,
        'cumulative': 0,
        'visited': [source],
        'route': frozenset([0]),
        'cost': 0
    }

    pile = [root]
    label_id = iter(range(start_from, stop_iteration))
    store = []

    dijkstra = nx.single_source_dijkstra_path_length(graph, source)

    tolerated = {
        key: best * spread + absolute
        for key, best in dijkstra.items()
    }

    node_set = graph.edge.keys()
    data = graph.node

    stack_progress = FloatProgress(min=0,
                                   max=max_stack,
                                   width=975,
                                   height=10,
                                   color=syscolors.rainbow_shades[1],
                                   margin=5)

    stack_progress.value = 0
    display(stack_progress)

    iteration_progress = FloatProgress(min=0,
                                       max=stop_iteration,
                                       width=975,
                                       height=10,
                                       color=syscolors.rainbow_shades[0],
                                       margin=5)

    iteration_progress.value = 0
    display(iteration_progress)

    def next_labels(label, label_id):

        stop = label['stop']
        node = label['node']
        route = label['route']
        cumulative = label['cumulative']
        cost = label['cost']
        label['label_id'] = iteration_progress.value = label_id
        visited = label['visited']

        store.append(label)

        if len(route) - 2 > max_transfer:
            return []

        # the eggress links have the save stop as
        # the transit link that preceed them, they are free

        try:
            neighbors = graph.edge[node]
        except KeyError:
            print_if_debug('not in node_set', debug)
            return []  # the node has no neighbors - no next labels

        if cumulative > cutoff:
            print_if_debug('cutoff', debug)
            return []

        if cumulative > tolerated[node]:
            print_if_debug('dijkstra', debug)
            return []

        proto_labels = [{
            'node':
            key,
            'stop':
            data[key]['destination'],
            'parent':
            label_id,
            'cost':
            value['weight'],
            'cumulative':
            cumulative + value['weight'],
            'visited':
            visited + [data[key]['destination']],
            'route':
            frozenset(route.union({data[key]['route_id']}))
        } for key, value in neighbors.items()
                        if data[key]['destination'] not in visited[:-1]]
        #  an egress has the same destination as the link it follows [:-2]

        print_if_debug(('proto_labels_length', len(proto_labels)), debug)
        return proto_labels

    while len(pile) and len(pile) < max_stack:
        # on remplace le dernier élément de la pile par tous ses enfants
        pile = next_labels(pile.pop(), next(label_id)) + pile
        stack_progress.value = len(pile)

    return store
Ejemplo n.º 11
0
    def __init__(self,
                 zones=None,
                 nodes=None,
                 text=None,
                 file=None,
                 edges=None,
                 build_geometries=False,
                 build_graph=False,
                 sep='line name',
                 leg_type='nearest',
                 prj=None):

        progress = FloatProgress(min=0,
                                 max=5,
                                 width=975,
                                 height=10,
                                 color=syscolors.rainbow_shades[1],
                                 margin=5)
        progress.value = 1
        display(progress)

        if not text and file:
            with open(file, 'r') as lin_file:
                text = lin_file.read()

        equal = re.compile('[ ]*[=]+[ ]*')
        coma = re.compile('[ ]*[,]+[ ]*')
        lower_text = text.lower().replace('n=',
                                          'N=').replace('rt=', 'RT=').replace(
                                              '<<pt>>', '<<pt>>')
        self.text = coma.sub(', ', equal.sub('=', lower_text.replace(
            '"', "'")))  #: raw text of the .LIN (str)

        stop_list = _stop_list(self.text)
        self.lin_chunks = self.text.split(sep)
        self.sep = sep

        self._to_dict()
        self.line_names = [line_name(c) for c in self.lin_chunks]
        self.line_names = [
            name for name in self.line_names if name != 'not_a_line'
        ]

        if zones is not None:

            zone_stops = _zone_stops(zones, nodes, stop_list, leg_type)
            stop_lines = _stop_lines(stop_list, self.lin_chunks)
            zone_lines = _zone_lines(zone_stops, stop_list, stop_lines)
            hubs = _hubs(zone_stops, stop_lines, zone_lines)

            self.zone_stops = zone_stops  #: dictionary of the stops of each zone {zone: [stops that are in the zone]}
            self.stop_lines = stop_lines  #: dictionary of the lines of each stop {stop: [lines that stop]}
            self.zone_lines = zone_lines  #: dictionary of the lines of each zone {zone: [lines that stop in the zone]}
            self.hubs = hubs  #:  minimal set of nodes that are necessary to keep the keep zone_lines stable while pruning zone_stops
            self.hubs_and_terminus = self.hubs.union(self.find_endpoints())
            self.transitlegs = _transitlegs(
                self.stop_lines
            )  #: list of stop<->line links (based on self.stop_lines)
            self.nontransitlegs = _nontransitlegs(
                self.zone_stops
            )  #: list of zone<->stop links (based on self.zone_stops)

        self.stop_list = stop_list
        self.zones = zones  #: GeoDataFrame of the zones : str
        self.prj = prj
        self.nodes = nodes  #: GeoDataFrame of the nodes
        self.line_count = _line_count(text)  #: line count by node
        self.data = self.geo_dataframe(
            geometry=False)  #: data organized as a dataframe

        progress.value += 1

        if build_graph:
            self.connection_graph = nx.Graph(
                self.transitlegs + self.nontransitlegs
            )  #: nx.Graph built with self.nontransitlegs and self.transitlegs
            self.path_matrix = _path_matrix(
                self.connection_graph, self.zones
            )  #: OD matrix that contains the path and the skims of each OD pair in the zoning (base on path_matrix)

        progress.value += 1

        if build_geometries:
            geometries = pandasshp.od_matrix(zones)
            self.path_matrix_geometries = pd.merge(
                self.path_matrix, geometries, on=['origin', 'destination']
            )  #: OD matrix that contains the path and the skims of each OD pair in the zoning + the geometry

        progress.value += 1

        if edges is not None:
            self.dijkstra = DijkstraMonkey(edges.values)

        progress.value += 1
Ejemplo n.º 12
0
def performRandomQuickTrainigAnalysis(X_train,
                                      y_train,
                                      nvalidate,
                                      windown=60,
                                      nanalysis=-1):
    """
    Cross-Validate the model    

    train the model using a sliding window of size windown

    the training is not progressive cumulative, a new tree every time

    and will produce nanalysis (array) of size number of window movements
    """

    # nvalidate  number of samples to use for validation
    pshifts = round(X_train.index.size - windown + 1 -
                    nvalidate)  # possible shifts N-windown+1

    print('Size train set: ', X_train.shape)
    print('samples in each window: ', windown)

    if nanalysis < 0:
        # number of samples default, equal number of
        # windows inside data
        nanalysis = round((X_train.index.size - nvalidate) / windown)
    #elif nanalysis >= pshifts:
    #    print("Error")
    #     return

    print('number of analysis: ', nanalysis)

    clfmodel = ExtraTreesClassifier(n_estimators=700, n_jobs=-1)

    step = int(round(pshifts / nanalysis))  # window shift step in samples
    #diff = pshifts-
    shifts = range(0, pshifts, step)
    accuracies = np.zeros(len(shifts))  # store the result of cross-validation
    iaccuracies = np.zeros(
        len(shifts))  # index of the last sample in the window

    f = FloatProgress(min=0, max=nanalysis)
    display(f)

    # sliding window with step sample of shift
    for j, i in enumerate(shifts):  # shift, classify and cross-validate
        f.value = j  # counter of analysis

        # create a random begin for the window
        i = np.random.randint(0, X_train.index.size - windown - nvalidate)

        # train using the window samples
        X_trainFolds = X_train[i:i + windown]
        y_trainFolds = y_train[i:i + windown]
        # test using the samples just after the window
        X_testFold = X_train[i + windown + 1:i + windown + nvalidate]
        y_testFold = y_train[i + windown + 1:i + windown + nvalidate]

        clfmodel.fit(X_trainFolds, y_trainFolds)
        accuracies[j] = clfmodel.score(X_testFold, y_testFold)
        iaccuracies[j] = i + windown

    return (iaccuracies, accuracies), clfmodel
Ejemplo n.º 13
0
def performCV_analysis(X_train,
                       y_train,
                       windown,
                       nanalysis=-1,
                       nvalidate=2,
                       algorithm='ET'):
    """
    Cross-Validate the model    

    train the model using a sliding window of size windown

    the training is progressive cumulative

    and will produce nanalysis (array) of size number of window movements
    """

    # nvalidate  number of samples to use for validation
    pshifts = round(X_train.index.size - windown + 1 -
                    nvalidate)  # possible shifts N-windown+1

    print('Size train set: ', X_train.shape)
    print('samples in each window: ', windown)

    if nanalysis < 0:
        # number of samples default, equal number of
        # windows inside data
        nanalysis = round((X_train.index.size - nvalidate) / windown)
        print('number of analysis: ', nanalysis)
    elif nanalysis >= pshifts:
        print("Error")
        return

    if algorithm == 'RF':  # classification model
        # random forest binary classifier
        clf = RandomForestClassifier(n_estimators=700, n_jobs=-1)
    else:
        # extra tree binary classifier
        clf = ExtraTreesClassifier(n_estimators=700, n_jobs=-1)

    step = int(round(pshifts / nanalysis))  # window shift step in samples
    #diff = pshifts-
    shifts = range(0, pshifts, step)
    accuracies = np.zeros(len(shifts))  # store the result of cross-validation
    iaccuracies = np.zeros(
        len(shifts))  # index of the last sample in the window

    f = FloatProgress(min=0, max=nanalysis)
    display(f)

    # sliding window with step sample of shift
    for j, i in enumerate(shifts):  # shift, classify and cross-validate
        f.value = j  # counter of analysis

        # train using the window samples
        X_trainFolds = X_train[i:i + windown]
        y_trainFolds = y_train[i:i + windown]
        # test using the samples just after the window
        X_testFold = X_train[i + windown + 1:i + windown + nvalidate]
        y_testFold = y_train[i + windown + 1:i + windown + nvalidate]

        clf.fit(X_trainFolds, y_trainFolds)
        accuracies[j] = clf.score(X_testFold, y_testFold)
        iaccuracies[j] = i + windown

    return (iaccuracies, accuracies), clf
Ejemplo n.º 14
0
def csa(_links, start, infinity=999999, connection_time=False, origins=False):

    links = _links.copy()
    origin_set = set(links['origin']).intersection(
        set(origins)) if origins else set(links['origin'])
    stop_set = set(links['origin']).union(set(links['destination']))

    progress = FloatProgress(min=0,
                             max=len(origin_set),
                             width=975,
                             height=10,
                             color=syscolors.rainbow_shades[1],
                             margin=5)
    progress.value = 1
    display(progress)

    links['reachable'] = False
    csa_connections = links.to_dict(orient='records')

    earliest_arrival_time_dict = {}
    earliest_arrival_link_dict = {}
    reachable_connections_dict = {}
    reachable_trips_dict = {}

    connection_time = connection_time if connection_time else {
        s: 0
        for s in stop_set
    }

    print(len(origin_set))
    for origin in list(origin_set):

        progress.value += 1

        reachable_connections = {l: 0 for l in list(links['index'])}
        reachable_trips = {t: 0 for t in list(links['trip_id'])}

        earliest_arrival_time = {s: infinity for s in stop_set}
        earliest_arrival_time[origin] = start
        earliest_arrival_link = {}

        def is_reachable(label):
            r = reachable_trips[label['trip_id']] or \
                earliest_arrival_time[label['origin']] + connection_time[label['destination']] \
                    <= label['departure_time']

            return r

        def scan(label):
            reachable = is_reachable(label)
            reachable_trips[label['trip_id']], reachable_connections[
                label['index']] = reachable, reachable

            if reachable:
                if earliest_arrival_time[
                        label['destination']] > label['arrival_time']:
                    earliest_arrival_time[
                        label['destination']] = label['arrival_time']
                    earliest_arrival_link[
                        label['destination']] = label['index']

        for connection in csa_connections:
            scan(connection)

        earliest_arrival_time_dict[origin] = earliest_arrival_time
        earliest_arrival_link_dict[origin] = earliest_arrival_link
        reachable_connections_dict[origin] = reachable_connections
        reachable_trips_dict[origin] = reachable_trips

    return {
        'earliest_arrival_time_dict': earliest_arrival_time_dict,
        'earliest_arrival_link_dict': earliest_arrival_link_dict,
        'reachable_connections_dict': reachable_connections_dict,
        'reachable_trips_dict': reachable_trips
    }