Esempio n. 1
0
def naive(adjuster, segments):
    """
    implementation of the naive algorithl to find crossing
    between segments
    """

    results = {}
    graph = [[0], [0]]  # This will be useful to observe the time complexity
    start = time()
    finished = special.binom(
        len(segments),
        2)  # This will be useful to print a progress bar in the console
    segments_processed = 0

    for segment_1, segment_2 in combinations(segments, 2):
        if time() - start >= 1200:
            return results, graph
        new_intersection = segment_1.intersection_with(segment_2)
        if new_intersection is not None:
            new_intersection = adjuster.hash_point(new_intersection)
            if new_intersection not in segment_1.endpoints + segment_2.endpoints:
                for segment in [segment_1, segment_2]:
                    if segment in results:
                        results[segment] += [new_intersection]
                    else:
                        results[segment] = [new_intersection]
        segments_processed += 1
        graph[0] += [time() - start]
        graph[1] += [len(list(set().union(*results.values())))]
        progress_bar(finished - segments_processed, finished)

    return results, graph
Esempio n. 2
0
def train_epoch(model,
                opt,
                lr_scheduler,
                epoch,
                dataloader,
                gpu_id=0,
                verbose=True):
    _ = model.train()
    batches_per_epoch = len(dataloader)
    train_loss, correct, total = 0, 0, 0
    for batch_idx, (data, targets) in enumerate(dataloader):
        data, targets = Variable(data.cuda(gpu_id)), Variable(
            targets.cuda(gpu_id))

        # Set LR
        LRSchedule.set_lr(opt,
                          lr_scheduler(epoch + batch_idx / batches_per_epoch))

        opt.zero_grad()
        outputs = model(data)
        loss = F.cross_entropy(outputs, targets)
        loss.backward()
        opt.step()

        train_loss += loss.data[0]
        predicted = torch.max(outputs.data, 1)[1]
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if verbose:
            progress_bar(
                batch_idx, batches_per_epoch, 'Loss: %.3f | Acc: %.3f%%' %
                (train_loss / (batch_idx + 1), 100. * correct / total))

    return float(correct) / total
Esempio n. 3
0
def _learn_low_rank_metric_gradient_descent(x, relations, rank, S, cost, tol, step, max_iter, verbose):
    """
    This method is used by learn_low_rank_metric as an optimization subprocedure. See
        learn_low_rank_metric for more information.
    """
    G = np.random.randn(rank, dim_count) * 0.01
    converged = False
    for e in range(max_iter):
        if verbose:
            helpers.progress_bar(current=e+1, max=max_iter, update_freq=int(max_iter/100))

        Gold = G
        # form the matrix \sum_{violated ijk} [(x[i]-x[j])(x[i]-x[j])^T - (x[i]-x[k])(x[i] - x[k])^T]
        GS = np.zeros((rank, dim_count))
        for (i, j, k) in relations:
            dij, dist_ij = _mahalonobis_distance(x[i, :], x[j, :], G, type='low_rank')
            dik, dist_ik = _mahalonobis_distance(x[i, :], x[k, :], G, type='low_rank')
            if dist_ij - dist_ik + 1.0 >= 0.0:
                GS += np.dot(G, S[(i, j, k)])

        # gradient descent step
        grad_G = G + (2 * cost * GS)
        G = G - (step * grad_G)

        # check convergence
        if np.sum(np.square(G - Gold)) < tol:
            converged = True
            if verbose:
                print('\nConverged at {0:d}'.format(e))
            break

    return G, converged
 def augment(x, channel_shift):
     print("Performing channel shifts...")
     helpers.progress_bar(0, x.shape[0], prefix="Progress", suffix="Complete", length=30, fill="=")
     for row in range(x.shape[0]):
         for ch in range(x.shape[3]):
             x[row] += random.randrange(channel_shift[0], channel_shift[1])
         helpers.progress_bar(row + 1, x.shape[0], prefix="Progress", suffix="Complete", length=30, fill="=")
     x[x > 255] = 255
     x[x < 0] = 0
     return x
Esempio n. 5
0
def bentley_ottmann(adjuster, segments):
    """
    implementation of the Bentley Ottmann algorithm
    """

    events = Events(segments)
    living = []
    results = {}

    graph = [[0], [0]] # This will be useful to observe the time complexity
    start = time()
    finished = len(events.heap) # This will be useful to print a progress bar in the console

    while events.heap and time() - start < 1200:

        current_point, intersection, lower, upper, horizontal = events.pop_event()
        """
        - lower = [segments that have the current_point as a lower endpoint]
        - upper = [segments that have the current_point as an upper endpoint]
        - intersection [segments that strike through the current_point but where
          the current_point is not an endpoint of the said segments]
        - horizontal = [horizontal segments that have the current_point as an endpoint]
        """

        for segment in horizontal:
            for other_segment in living:
                find_new_event(segment, other_segment, current_point, events, results, adjuster)

        for segment in upper:
            left_segment, right_segment = nearest_living(segment, living)
            find_new_event(left_segment, right_segment, current_point, events, results, adjuster)
            living.remove(segment)


        for segment in lower:
            living.append(segment)
            living.sort(key=lambda segment: living_key(segment, current_point, adjuster))
            left_segment, right_segment = nearest_living(segment, living)
            find_new_event(segment, right_segment, current_point, events, results, adjuster)
            find_new_event(left_segment, segment, current_point, events, results, adjuster)

        for segment in intersection:
            living.sort(key=lambda segment: living_key(segment, current_point, adjuster))
            left_segment, right_segment = nearest_living(segment, living)
            find_new_event(segment, right_segment, current_point, events, results, adjuster)
            find_new_event(left_segment, segment, current_point, events, results, adjuster)

        graph[0] += [time() - start]
        graph[1] += [len(list(set().union(*results.values())))]
        progress_bar(len(events.heap), finished)

    return results, graph
 def evaluate(self):
     self.update_config()
     if self._model is not None:
         total = {"tp": 0, "tn": 0, "fp": 0, "fn": 0, "n": 0, "p": 0, "t": 0, "f": 0}
         for directory in self._evaluate_data:
             summary = self.set_summary()
             image_paths = self.get_image_paths2("../" + directory)
             X, Y = self.get_data(image_paths)
             image_paths = [path for paths in image_paths for path in paths]
             print("Evaluating dataset")
             with open("../results/cell_data/" + directory.split("/")[-1] + ".csv", "w") as file:
                 file.write("cell path,status,confidence\n")
             count = 0
             helpers.progress_bar(0, self._evaluate_batches,
                                  prefix="Progress", suffix="Complete", length=30, fill="=")
             for batch in range(self._evaluate_batches):
                 x, y = self.get_batch(X, Y, batch, self._evaluate_batches)
                 # x = self.normalise(x)
                 x /= 255
                 predictions = self._model.predict(x,
                                                   batch_size=self._batch_size,
                                                   verbose=0)
                 for label, prediction in zip(y, predictions):
                     status = self.get_status(label, self.get_prediction(prediction, self._threshold))
                     summary[status] += 1
                     with open("../results/cell_data/" + directory.split("/")[-1] + ".csv", "a") as file:
                         file.write(",".join([image_paths[count], status, str(prediction[1])]) + "\n")
                     count += 1
                 helpers.progress_bar(batch + 1, self._evaluate_batches,
                                      prefix="Progress", suffix="Complete", length=30, fill="=")
             summary["sample"] = directory.split("/")[-1]
             try:
                 summary["sensitivity"] = summary["tp"] / (summary["tp"] + summary["fn"])
             except ZeroDivisionError:
                 print("Warning: no positive cases.")
             try:
                 summary["specificity"] = summary["tn"] / (summary["tn"] + summary["fp"])
             except ZeroDivisionError:
                 print("Warning: no negative cases.")
             print(summary)
             total["tp"] += summary["tp"]
             total["fp"] += summary["fp"]
             total["tn"] += summary["tn"]
             total["fn"] += summary["fn"]
         total["n"] = total["tn"] + total["fp"]
         total["p"] = total["tp"] + total["fn"]
         total["t"] = total["tp"] + total["tn"]
         total["f"] = total["fp"] + total["fn"]
         print(total)
     else:
         print("Warning: no model configured.")
Esempio n. 7
0
def convert_dwt_images(lead):
    data_x, data_y, fnames = dgen.get_data(
        # n_files=1,
        targets=cfg.targets,
        return_fnames=True,
        channels=[lead],
        norm=True)

    for i, ecg in enumerate(data_x):
        title = fnames[i].split('.')[0]
        save_wavelet_img([i for i in range(data_x.shape[1])],
                         ecg[:, 0],
                         np.arange(1, 128, 2),
                         title=title)
        # used_fnames[fnames[i]] += 1
        progress_bar("Converting to DWT image", i, data_x.shape[0])
    def __init__(self, alp, size=100):
        self._alp = alp  # instance of the aircraft landing problem

        self._members = list()
        for i in range(size):
            if i < size - 3:  # random individuals
                self._members.append(
                    Individual(alp, mode=Individual.Mode.random))
            elif i == size - 3:  # heuristic individuals
                self._members.append(
                    Individual(alp, mode=Individual.Mode.earliest_h))
            elif i == size - 2:
                self._members.append(
                    Individual(alp, mode=Individual.Mode.target_h))
            elif i == size - 1:
                self._members.append(
                    Individual(alp, mode=Individual.Mode.latest_h))
            hlp.progress_bar(current=i + 1,
                             end=size,
                             title=format('[ INIT POP   ]'))

        # Initial sorting according to fitness
        self._members = sorted(self._members)

        print('\r[ INIT POP   ] Size: %d / Best fitness: %d' %
              (len(self._members), max(self._members).fitness),
              flush=True)

        # Setup graph structure
        # - stores distances below below threshold
        # - makes it easy to derive maximum independent sets (parent selection)
        self._graph = nx.Graph()
        self._threshold = self._alp.nr_planes / 10

        # Add each individual as node to the graph
        for individual in self._members:
            self._graph.add_node(individual)

        # For each pair of individuals that are too close, add an edge to the graph
        relations = [(ind_a, ind_b) for ind_a in self._members
                     for ind_b in self._members if ind_b != ind_a]
        for ind_a, ind_b in relations:
            if not self._graph.has_edge(ind_a, ind_b):
                distance = ind_a.distance(ind_b)
                if distance < self._threshold:
                    self._graph.add_edge(ind_a, ind_b, weight=distance)
Esempio n. 9
0
def _learn_diagonal_metric_gradient_descent(cost, dist_mat, dist_squared, max_iter, tol, step, verbose):
    """
    This method is used by learn_diagonal_metric as an optimization procedure. See learn_diagonal_metric
        for details.
    """
    dim_count = dist_mat.shape[0]
    relation_count = dist_mat.shape[1]
    alpha = np.abs(np.random.randn(relation_count)) * 0.01
    beta = np.abs(np.random.randn(dim_count)) * 0.01

    converged = False
    for e in range(max_iter):
        if verbose:
            helpers.progress_bar(current=e, max=max_iter-1, update_freq=int(max_iter/100.0))

        # calculate the gradients
        grad_alpha = 1 + np.dot(dist_mat.T, beta) - np.dot(dist_squared, alpha)
        grad_beta = np.dot(dist_mat, alpha) - beta

        # check stationarity conditions
        #   if \beta_d >= 0 then grad_beta_d = 0.0
        #   if \beta_d = 0 then grad_beta_d < 0.0
        #   if cost > \alpha_r > 0 then grad_alpha_r = 0.0
        #   if \alpha_r = 0 then grad_alpha_r < 0.0
        #   if \alpha_r = cost then grad_alpha_r > 0.0
        if np.allclose(a=grad_beta[beta > 0.0], b=0.0, atol=tol) and \
                np.all(grad_beta[np.isclose(a=beta, b=0.0, atol=tol)] < 0.0) and \
                np.allclose(a=grad_alpha[np.logical_and(alpha > 0.0, alpha < cost)], b=0.0, atol=tol) and \
                np.all(grad_alpha[np.isclose(a=alpha, b=0.0, atol=tol)] < 0.0) and \
                np.all(grad_alpha[np.isclose(a=alpha, b=cost, atol=tol)] > 0.0):
            converged = True
            if verbose:
                print("\nConverged at {0:d}".format(e))
            break

        # gradient ascent update
        alpha = alpha + step * grad_alpha
        beta = beta + step * grad_beta

        # projection step
        alpha[alpha < 0.0] = 0.0
        alpha[alpha > cost] = cost
        beta[beta < 0.0] = 0.0

    return alpha, beta, converged
 def get_data(self, image_paths):
     n = sum([len(paths) for paths in image_paths])
     x = np.empty((n,
                   self._image_shape[0],
                   self._image_shape[1],
                   self._image_shape[2]), dtype=np.uint8)
     y = np.empty((n, 1), dtype=np.uint8)
     row = 0
     for label, paths in enumerate(image_paths):
         print("Loading images from class " + str(label))
         l = len(paths)
         helpers.progress_bar(0, l, prefix="Progress", suffix="Complete", length=30, fill="=")
         for i, path in enumerate(paths):
             y[row] = label
             x[row] = cv2.imread(path)
             row += 1
             helpers.progress_bar(i + 1, l, prefix="Progress", suffix="Complete", length=30, fill="=")
     return x, y
Esempio n. 11
0
def eval_epoch(model, dataloader, gpu_id=0, verbose=True):
    _ = model.eval()
    batches_per_epoch = len(dataloader)
    eval_loss, correct, total = 0, 0, 0
    for batch_idx, (data, targets) in enumerate(dataloader):
        data, targets = Variable(data.cuda(gpu_id),
                                 volatile=True), Variable(targets.cuda(gpu_id))

        outputs = model(data)
        loss = F.cross_entropy(outputs, targets)

        eval_loss += loss.data[0]
        predicted = torch.max(outputs.data, 1)[1]
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if verbose:
            progress_bar(
                batch_idx, batches_per_epoch, 'Loss: %.3f | Acc: %.3f%%' %
                (eval_loss / (batch_idx + 1), 100. * correct / total))

    return float(correct) / total
Esempio n. 12
0
def preprocess_data(data_x,
                    smooth_window_size=51,
                    smooth_order=4,
                    fourier_baseline_resolution=20,
                    verbosity=False):
    """ function: preprocess_data

    preprocess the data by smoothing and straightening.

    Args:
        data_x : np.ndarray
            the data to preprocess.
    Returns:
        p_data_x : np.ndarray
            preprocessed data
    """
    assert data_x.ndim == 3

    if verbosity:
        print("Preprocessing data...")
        start = time.time()

    p_data_x = np.empty(shape=data_x.shape)

    for i, ecg in enumerate(data_x):
        for channel in range(ecg.shape[1]):
            prepped_channel = savitzky_golay(ecg[:, channel],
                                             window_size=smooth_window_size,
                                             order=smooth_order)
            prepped_channel = fourier_straighten(
                prepped_channel, resolution=fourier_baseline_resolution)
            p_data_x[i, :, channel] = prepped_channel
        if verbosity:
            progress_bar("Processed", i, data_x.shape[0])
    if verbosity:
        print('\nDone, took ' + str(round(time.time() - start, 1)) +
              ' seconds')
    return p_data_x
 def evaluate_for_all_thresholds(self, steps=21):
     self.update_config()
     if self._model is not None:
         data = {"Threshold": [],
                 "TN": [], "TP": [], "FN": [], "FP": [],
                 "N": [], "P": [], "T": [], "F": [],
                 "TNR": [], "FNR": [], "TPR": [], "FPR": [],
                 "PPV": [], "NPV": [],
                 "LRp": [], "LRn": [],
                 "ACC": [], "F1": [],
                 "MCC": [], "Informedness": [], "Markedness": []}
         for threshold in np.linspace(0, 1, steps):
             print("Threshold: " + str(threshold))
             summary = {"tn": 0, "tp": 0, "fn": 0, "fp": 0}
             for directory in self._evaluate_data:
                 image_paths = self.get_image_paths2("../" + directory)
                 X, Y = self.get_data(image_paths)
                 print("Evaluating dataset")
                 helpers.progress_bar(0, self._evaluate_batches,
                                      prefix="Progress", suffix="Complete", length=30, fill="=")
                 for batch in range(self._evaluate_batches):
                     x, y = self.get_batch(X, Y, batch, self._evaluate_batches)
                     # x = self.normalise(x)
                     x /= 255
                     predictions = self._model.predict(x,
                                                       batch_size=self._batch_size,
                                                       verbose=0)
                     for label, prediction in zip(y, predictions):
                         status = self.get_status(label, self.get_prediction(prediction, threshold))
                         summary[status] += 1
                     helpers.progress_bar(batch + 1, self._evaluate_batches,
                                          prefix="Progress", suffix="Complete", length=30, fill="=")
             data["Threshold"].append(threshold)
             data = self.fill_data(data, summary)
         data = pd.DataFrame.from_dict(data)
         data.to_csv("../data.csv", sep=",")
     else:
         print("Warning: model not configured.")
Esempio n. 14
0
def extract_windows(data_x, data_y, pulse_size, fnames=[], verbosity=False):
    """ function : extract_windows

    extract all pulses from an ecg and scale them to a given size

    Args:
        data_x : np.ndarray
            an array of ECG's
        data_y : np.ndarray
            an array of targets of the ECG's
        pulse_size : int [optional, default: 80]
            the size to scale the pulses to
        exclude_first_channel : bool [optional, default: False]
            whether to extract pulses from the first channel.
            used when only rpeak information is needed from first channel
    Returns:
        pulse_data_x : np.ndarray
            an array of pulses
        pulse_data_y : np.ndarray
            an array of targets of the corresponding pulses

    """
    if verbosity:
        start = time.time()
        print("Extracting and scaling pulses from ECG's...")
    n_samples, n_points, n_channels = data_x.shape

    # if exclude_first_channel:
    #     n_channels = max(n_channels - 1, 1)
    pulses = np.empty(shape=(n_samples * 25, pulse_size, n_channels))
    pulse_targets = np.empty(shape=(n_samples * 25))
    pulse_n = 0

    new_fnames = []

    for i, ecg in enumerate(data_x):
        # We assume lead 0 is a lead where we can extract rpeaks
        rpeaks = get_rpeaks(ecg.T[0])

        ecg_start = 0
        for rpeak_n in range(1, len(rpeaks) - 1):
            pulse = ecg[rpeaks[rpeak_n]:rpeaks[rpeak_n + 1], :]

            try:
                pulses[pulse_n, :, :] = pulse_scale(pulse, pulse_size)
                pulse_n += 1
                pulse_targets[pulse_n] = data_y[i]
                if fnames:
                    new_fnames.append(fnames[i].split('.')[0] + "_" +
                                      str(ecg_start) + ".csv")
                ecg_start += 1
            except:
                pass

        ecg_start = 0
        if verbosity:
            progress_bar("Extracted pulses from ECG", i, n_samples)
    if verbosity:
        print('Done, took ' + str(round(time.time() - start, 1)) + ' seconds')

    if len(fnames) > 0:
        return pulses[:pulse_n], pulse_targets[:pulse_n], new_fnames
    # make sure the data is of the correct length
    return pulses[:pulse_n], pulse_targets[:pulse_n]
    def generate_parent_sets(self):
        """ Generates a list of parent sets for child generation

        Generates a set of parents according section 5.5 in Pinol & Beasley (2006).

        In the parent selection process a distance measure is introduced to keep diversity
        in the parent set high. Nodes whose distance is less than a specified threshold value
        have an edge. When selecting a node for the parent set, each of his neighbours cannot
        be added to the same set. Furthermore, better individuals have higher probability of
        being selected for a parent set because the inclusion frequency corresponds to their
        rank.

        Returns:
            parent_sets (list of sets): list of parent sets, where each parent
                set can have different sizes
        """
        # update sorting to obtain valid ranks
        self._members = sorted(self._members)

        # start with graph that contains all possible
        # nodes and edges and assign ranks according to
        # each individuals fitness
        main_graph = self._graph.copy()
        for rank, individual in enumerate(self._members):
            # asc sorting --> worst individual is assigned
            # lowest rank
            main_graph.node[individual]['rank'] = rank + 1

        # individuals with distance below threshold
        # must have an edge --> others are removed
        # iteratively to reach reasonable number of edges
        max_nr_edges = len(self._members) * (len(self._members) - 1) / 2
        theta = self._threshold

        while main_graph.number_of_edges() > max_nr_edges / 2:
            # get edges to be removed because of too large distance
            edges = [(f, t) for (f, t, w) in main_graph.edges(data='weight')
                     if w >= theta]
            main_graph.remove_edges_from(edges)
            # Further reduce number of edges in next iteration
            theta = theta / 2.0

        total_nr_parents = sum(
            [rank for (parent, rank) in main_graph.nodes(data='rank')])

        # obtain sets of parent individuals while rank
        # (= inclusion frequency) greater than zero
        parent_sets = []
        while len(main_graph) > 0:

            parent_set = set()
            set_graph = main_graph.copy()

            while len(set_graph) > 0:
                # pick random node
                individual = rd.choice(list(set_graph))
                parent_set.add(individual)
                new_rank = main_graph.node[individual]['rank'] - 1
                if new_rank <= 0:
                    # remove node from initial graph
                    main_graph.remove_node(individual)
                else:
                    main_graph.node[individual]['rank'] = new_rank
                neighbors = list(set_graph.neighbors(individual))
                set_graph.remove_node(individual)
                set_graph.remove_nodes_from(neighbors)

            parent_sets.append(parent_set)

            # Print progress
            nr_parents = total_nr_parents - sum(
                [r for (n, r) in main_graph.nodes(data='rank')])
            hlp.progress_bar(nr_parents, total_nr_parents, '[ SELECTION  ]')

        parent_sets = [p_set for p_set in parent_sets if len(p_set) > 1]

        print('\r[ SELECTION  ] Sets generated: %d' % (len(parent_sets)),
              flush=True)

        return parent_sets, theta
    def generate_children(self, parent_sets):
        """Generates a set of children from a given set of parents

        Generates a set of children from the given parentset according to section 5.6 in Pinol & Beasley (2006).
        Then checks if an individual with the same sequence already exists in the population and removes this
        child in that case according to section 5.7.
        Locally improves every child from the children set according to section 5.8, depending on whether the
        non-linear objective or linear objective is chosen.

        Args:
            parent_sets (list of sets): sets of individuals

        Returns:
            children (list of individuals): list of generated children
        """

        children = []
        for set_nr, parent_set in enumerate(parent_sets):
            if parent_set is None:
                return
            # generate random weights for each parent
            abs_weights = [rd.random() for _ in range(len(parent_set))]
            # normalize weights
            sum_of_weights = sum(abs_weights)
            rel_weights = [w / sum_of_weights for w in abs_weights]

            chromosome = []
            for i in range(self._alp.nr_planes):
                # determine proportion value
                parent_props = [
                    parent.chromosome[i][1] for parent in parent_set
                ]
                child_prop = round(
                    sum([w * p for w, p in zip(rel_weights, parent_props)]), 6)

                # determine runway
                parent_runways = [
                    parent.chromosome[i][2] for parent in parent_set
                ]
                child_rw = rd.choice(parent_runways)

                # add to chromosome
                chromosome.append((i, child_prop, child_rw))

            child = Individual(alp=self._alp,
                               mode=Individual.Mode.child,
                               chromosome=chromosome,
                               parents=parent_set)

            # exclude duplicates with respect to the current population
            if not self._duplicate(child):
                child.improve()
                children.append(child)

            # Print progress
            hlp.progress_bar(current=set_nr + 1,
                             end=len(parent_sets),
                             title=format('[ CROSSOVER  ]'))

        # Print information
        if len(children) > 0:
            print('\r[ CROSSOVER  ] Children generated: %d' % len(children),
                  flush=True)
        else:
            print('\r[ CROSSOVER  ] No children generated', flush=True)

        return children
Esempio n. 17
0
def evaluate_model(data_x=[], targets=[], fnames=[], model=None):
    """ function : evaluate_model

    create an evaluation (accuracy) for classification based on a threshold
    at least 'threshold' pulses in an ecg must be classified as unhealthy for the
    whole ecg to be unhealthy

    Args:
        n_ecgs : int or Nonetype [optional, default: None]
            the number of ecg's to base accuracy on
        threshold : int [optional, default: 2]
            the number of pulses that need to be unhealthy for the ecg to be
            labeled as unhealthy
    Returns:
        accuracy : float
            the accuracy of the modeled tested on ecg's
    """

    if len(data_x) == 0:
        data_x, targets, fnames = dgen.get_data(return_fnames=True,
                                                channels=np.array([0]),
                                                norm=True,
                                                exclude_targets=[2, 3, 4])

    if model == None:
        model = load_model(cfg.model_save_name,
                           custom_objects={
                               'precision': precision,
                               'recall': recall
                           })

    # n_correct = 0
    tp = 0
    tn = 0
    fp = 0
    fn = 0

    predictions = []

    mse = 0

    if cfg.verbosity:
        print("Evaluating model with ECG's")
        start = time.time()

    for i, ecg in enumerate(data_x):
        # print(ecg.shape)
        pulse_data_x, pulse_data_y = dprep.extract_windows(
            np.expand_dims(ecg, axis=0),
            np.array([targets[i]]),
            cfg.nn_input_size,
            exclude_first_channel=True)

        nn_pulse_data_x = {"ecg_inp": np.squeeze(pulse_data_x)}

        # preds = model.predict(nn_pulse_data_x)
        preds = [
            int(round(pred[0])) for pred in model.predict(nn_pulse_data_x)
        ]
        pred = 1 if sum(preds) >= len(
            preds) * cfg.min_af_ratio_for_positive_prediction else 0

        mse += (targets[i] - pred)**2

        predictions.append(pred)

        if pred == 1 and targets[i] == 1:
            tp += 1
        elif pred == 0 and targets[i] == 0:
            tn += 1
        elif pred == 1 and targets[i] == 0:
            fp += 1
        elif pred == 0 and targets[i] == 0:
            fn += 1

        progress_bar("Evaluating ECG", i, data_x.shape[0])
    if cfg.verbosity:
        print('Done, took ' + str(round(time.time() - start, 1)) + ' seconds')

    mse /= len(targets)

    ppv, tpr, thresholds_pr = precision_recall_curve(targets, predictions)
    fpr, tpr, thresholds_roc = roc_curve(targets, predictions)

    fpr_tpr_auc = sklearn_auc(fpr, tpr)
    tpr_ppv_auc = sklearn_auc(tpr, ppv)

    accuracy = (tp + tn) / len(targets)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    # specificity = tn/(fp + tn)
    f1 = (2 * tp) / (2 * tp + fp + fn)

    metrics = [mse, accuracy, precision, recall, fpr_tpr_auc, tpr_ppv_auc, f1]
    print(metrics)
    return metrics
Esempio n. 18
0
def get_data(cfg=None,
             n_files=None,
             split=False,
             channels=[],
             targets=[],
             return_fnames=False,
             randomize_order=False,
             extension='.csv',
             n_points=None,
             include_first_channel=False,
             location=None,
             filename_fmt=None,
             filename_sep="_",
             verbosity=None,
             open_files=[]):
    """ function: get_data

    returns data in the directory specified in the helpers.py file

    Args:
        n_files : (Nonetype or int) [optional, default: None]
            the number of samples to return, return all available data if set to
            None
        extension : str [optional, default: '.csv']
            the extension (filtype) of the data. can be anything, as long as
            it's readable by np.loadtxt
        split : (bool or str) [optional, default: False]
            to split data 50/50 into healthy/non-healthy or not (only works if
            target is set to None)
            if set to 'max', the function will determine what the max amount of
            files is while keeping the ration 50/50 (will override n_files)
        channels : (Nonetype or np.array) [optional, default: None]
            indices of channels to return or None for all channels
        targets : (list) [optional, default: []]
            a list of conditions to return
        return_fnames : bool [optional, default: False]
            wheter to return a the filenames of the data
        randomize_order : bool [optional, default: True]
            whether to randomize the order of the data
        n_points : int [optional, default: None]
            the number of data points to exctract
        include_first_channel : bool [optional, default: False]
            whether to return an extra copy of the first channels
            (for determining rpeaks in data from other channels)
        unique_patients : bool [optional, default: False]
            whether to only use one ecg per patient to reduce bias
        location : str or Nonetype [optional, default: None]
            the location to load the data from. if None loads the processed 
            data location specified in the config

    Returns:
        data_x : np.ndarray
            the ecg data itself as a 3D array with shape
            (n_ecgs, ecg_len, n_channels)
        data_y : np.ndarray
            an array of target variables
        files : list [optional]
            a list of all files
    """
    if cfg == None:
        cfg = global_params.cfg

    # if delimiter == None:
    #     delimiter = cfg.delimiter

    if verbosity == None:
        verbosity = cfg.verbosity

    if verbosity:
        print("Assembling data from files...")
        start = time.time()

    if channels == []:
        channels = [x for x in range(cfg.n_channels)]

    n_channels = len(channels)

    if include_first_channel and 0 not in channels:
        channels = [0] + channels
        n_channels += 1

    if location == None:
        location = cfg.processed_data_location

    if targets == []:
        targets = cfg.targets

    # get a list of all filenames
    used_patients = []

    if len(open_files) == 0:
        filters = {}
        if targets:
            filters["TARGET"] = targets
        all_files = get_filenames(location, extension, filters)
    else:
        all_files = open_files

    # set number of files to all files if target number is not specified
    if type(n_files) != int or n_files != len(all_files):
        n_files = len(all_files)

    # handle the case where the data has to be split with specified amount
    if split != "max" and split:
        # all healthy files
        sr_files = [f for f in all_files if filename_info(f, "TARGET") == "SR"]
        # all non-healthy files
        asr_files = [
            f for f in all_files if filename_info(f, "TARGET") != "SR"
        ]

        try:
            # try to get a random sample of these files of the amount specified
            files = random.sample(sr_files, int(n_files / 2))
            files += random.sample(asr_files, int(n_files / 2))
        except ValueError:
            # if thats not possible, the max amount that can still be loaded
            # will be used.
            warnings.warn("Not enough files with given target for requested \
                    amount, continuing with lower amount to maintain split.")
            split = "max"
    # handle the case where as many files as possible have to be gotten but the
    # split must be maintained
    if split == "max":
        sr_files = []
        asr_files = []
        for f in all_files:
            # create lists of healthy and non-healthy files
            if filename_info(f, "TARGET") == "SR":
                # target is sinus rythm
                sr_files.append(f)
            else:
                asr_files.append(f)

        # check which of the two lists is smaller, and set this to the size of
        # the sample that has to be taken from both
        m_files = min([len(sr_files), len(asr_files)])
        # concatenate these samples
        files = random.sample(sr_files, m_files)
        files += random.sample(asr_files, m_files)
        # reset number of files (since the number was found by checking what
        # the max amount is without losing the 50/50 ratio)
        n_files = len(files)
    if not split:
        # if no split is required, just take a random subset of the data
        files = random.sample(all_files, n_files)

    if randomize_order:
        # specified by args
        np.random.shuffle(files)

    if len(files) != n_files:
        warnings.warn(
            "The amount of files loaded is not the same as the amount requested"
        )

    if n_points == None:
        n_points = cfg.n_points

    data_x = np.empty(shape=(n_files, n_points, n_channels))
    data_y = np.zeros(shape=(n_files, ))

    for i, fname in enumerate(files):

        ecg = np.loadtxt(location + fname,
                         delimiter=cfg.delimiter,
                         dtype=np.float32,
                         usecols=channels,
                         ndmin=2)

        if cfg.normalize_data:
            # specified by args
            # divide each value in the ecg by the max of its column
            ecg = ecg / np.amax(np.abs(ecg), axis=0)[None, :]

        data_x[i, :, :] = ecg
        # data_y[i] = 0 if filename_info(fname, "SEX") == "M" else 1
        data_y[i] = getattr(cfg, filename_info(fname, "TARGET")[:2])

        if verbosity:
            progress_bar("Load ECG", i, n_files)
    if verbosity:
        print('Done, took ' + str(round(time.time() - start, 1)) + ' seconds')
    if return_fnames:
        # specified by args
        return data_x, data_y, files

    return data_x, data_y