Exemple #1
0
 def __init__(self):
     """initiate the Runner class"""
     self.fm = FileManager()
     self.dp = DataPrepper()
     self.tr = None
     self.de = None
     self.__location__ = os.path.realpath(
         os.path.join(os.getcwd(), os.path.dirname(__file__)))
Exemple #2
0
    def __init__(self, transforms, video_file, *args):

        self.video_name = video_file.split('/')[-1].split('.')[0]
        start = int(self.video_name.split('_')[-1])
        self.fm = FileManager()
        for i in args:
            self.pfm = i
        self.pid = self.pfm.pid
        self.transforms = transforms
        self.img_files = []

        cap = cv2.VideoCapture(video_file)
        self.height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        self.frames = []

        count = start
        for i in range(self.len):
            ret, frame = cap.read()
            if not ret:
                print("Couldn't read frame " + str(i) in video_file + ". Using last good frame", file=sys.stderr)
                break
            else:
                name = 'Frame_{}.jpg'.format(count)
                self.img_files.append(name)
                self.frames.append(frame)

            count += 1
            print('video: ', self.video_name, '\tFrame: ', count)

        cap.release()
Exemple #3
0
class Runner:
    """user-friendly class for accessing the majority of module's functionality."""
    def __init__(self):
        """initiate the Runner class"""
        self.fm = FileManager()
        self.dp = DataPrepper()
        self.tr = None
        self.de = None
        self.__location__ = os.path.realpath(
            os.path.join(os.getcwd(), os.path.dirname(__file__)))

    def download(self):
        """download all required data."""
        self.dp.download_all()

    def prep(self):
        """prep downloaded data"""
        self.dp.prep()

    def train(self, num_epochs, upload_results=True):
        """initiate a Trainer object and train the model.

        Args:
            num_epochs (int): number of epochs to train
            upload_results(bool): if True, automatically upload the results (weights, logs, etc.) after training
        """
        self.tr = Trainer(num_epochs, upload_results)
        self.tr.train()

    def sync(self):
        self.fm.sync_training_dir()

    def detect(self, img_dir):
        # self.down = DetectDownload()
        # master, i_dir, files = self.down._locate_cloud_files()
        # self.down.download(i_dir, files)
        self.de = Detector()
        if img_dir == 'test':
            self.de.test(5)
        elif img_dir == 'fullvideo':
            path = '/Users/rhiyasharma/Documents/_McGrathLab/CD_work/videos/short_ten.mp4'
            self.de.frame_detect(path)
        else:
            self.de.detect(img_dir)
Exemple #4
0
    def __init__(self, pid, video_path, video, csv_file, *args):

        self.fm = FileManager()
        self.track = Tracking()
        for i in args:
            self.pfm = i
        self.detection_dir = self.fm.local_files['detection_dir']
        self.video = video_path
        self.video_name = video.split('.')[0]
        self.ann_video_name = 'annotated_' + pid + '_' + self.video_name + '_p2.mp4'
        self.csv_file_path = join(self.detection_dir, csv_file)
Exemple #5
0
    def __init__(self, num_epochs, compare_annotations=True):
        """initialize trainer

        Args:
            num_epochs (int): number of epochs to train
            compare_annotations: If True, evaluate the model on the test set after each epoch. This does not affect the
                end result of training, but does produce more data about model performance at each epoch. Setting to
                True also increases total runtime significantly
        """
        self.compare_annotations = compare_annotations
        self.fm = FileManager()
        self.num_epochs = num_epochs
        self._initiate_loaders()
        self._initiate_model()
        self._initiate_loggers()
Exemple #6
0
    def __init__(self, transforms, subset):
        """initialize DataLoader

        Args:
            transforms: Composition of Pytorch transformations to apply to the data when loading
            subset (str): data subset to use, options are 'train' and 'test'
        """
        self.fm = FileManager()
        self.files_list = self.fm.local_files['{}_list'.format(subset)]
        self.img_dir = self.fm.local_files['{}_image_dir'.format(subset)]

        self.transforms = transforms

        # open either train_list.txt or test_list.txt and read the image file names
        with open(self.files_list, 'r') as f:
            self.img_files = sorted([os.path.join(self.img_dir, fname) for fname in f.read().splitlines()])
        # generate a list of matching label file names
        label_dir = self.fm.local_files['label_dir']
        self.label_files = [fname.replace('.jpg', '.txt') for fname in self.img_files]
        self.label_files = [join(label_dir, basename(path)) for path in self.label_files]
Exemple #7
0
 def __init__(self):
     self.fm = FileManager()
     self.fig_dir = self.fm.local_files['figure_dir']
     self.fig_data_dir = join(self.fig_dir, 'figure_data')
     self._load_data()
Exemple #8
0
class Plotter:
    def __init__(self):
        self.fm = FileManager()
        self.fig_dir = self.fm.local_files['figure_dir']
        self.fig_data_dir = join(self.fig_dir, 'figure_data')
        self._load_data()

    def save_fig(self, fig: Figure, file_stub: str):
        """save the figure as a pdf and close it

        Notes:
            saves the figure to the figure_dir specified in the FileManager object

        Args:
            fig (Figure): figure to save
            file_stub (str): name to use for the file. Don't include '.pdf'
        """
        fig.savefig(join(self.fig_dir, '{}.pdf'.format(file_stub)))
        plt.close('all')

    def plot_all(self):
        """create pdf's of every plot this class can produce"""
        self.total_loss_vs_epoch()
        self.n_boxes_vs_epoch()
        self.animated_learning()
        self.iou_vs_epoch()
        self.final_epoch_eval()

    @plotter_decorator
    def total_loss_vs_epoch(self, fig: Figure):
        """plot the training loss vs epoch and save as loss_vs_epoch.pdf

        Args:
            fig (Figure): matplotlib Figure object into which to plot
        """
        ax = fig.add_subplot(111)
        ax.set(xlabel='epoch',
               ylabel='total loss',
               title='Training Loss vs. Epoch')
        sns.lineplot(data=self.train_log.loss_total, ax=ax)
        self.train_log.loc[:, ['loss_total']].to_csv(
            join(self.fig_data_dir, 'total_loss_vs_epoch.csv'))

    @plotter_decorator
    def n_boxes_vs_epoch(self, fig: Figure):
        """plot the average number of boxes predicted per frame vs the epoch"""
        predicted = pd.Series(
            [df.boxes.apply(len).agg('mean') for df in self.epoch_predictions])
        actual = pd.Series([self.ground_truth.boxes.apply(len).agg('mean')] *
                           len(predicted))
        ax = fig.add_subplot(111)
        ax.set(xlabel='epoch',
               ylabel='avg # detections',
               title='Average Number of Detections vs. Epoch')
        sns.lineplot(data=predicted, ax=ax, label='predicted')
        sns.lineplot(data=actual, ax=ax, label='actual')
        df = pd.DataFrame({'predicted': predicted, 'actual': actual})
        df.to_csv(join(self.fig_data_dir, 'n_boxes_vs_epoch.csv'))

    @plotter_decorator(save=False)
    def animated_learning(self, fig: Figure):
        """for a single frame, successively plot the predicted boxes and labels at each epoch to create an animation"""

        # find a frame with a good balance of number of fish and final-epoch score for each box, and load that image
        final_epoch = self.epoch_predictions[-1].copy()
        final_epoch['n_detections'] = final_epoch['labels'].apply(len)
        final_epoch['min_score'] = final_epoch['scores'].apply(
            lambda x: 0 if len(x) is 0 else min(x))
        final_epoch = final_epoch[final_epoch.min_score > 0.95]
        frame = final_epoch.sort_values(by=['n_detections', 'min_score'],
                                        ascending=False).iloc[0].name
        im = np.array(Image.open(
            join(self.fm.local_files['test_image_dir'], frame)),
                      dtype=np.uint8)

        # build up the animation
        max_detections = 5
        ax = fig.add_subplot(111)
        plt.xlim(0, im.shape[1])
        plt.ylim(im.shape[0], 0)
        boxes = [
            Rectangle((0, 0), 0, 0, fill=False) for _ in range(max_detections)
        ]

        def init():
            for box in boxes:
                ax.add_patch(box)
            return boxes

        def animate(i):
            label_preds = self.epoch_predictions[i].loc[frame, 'labels']
            label_preds = (label_preds + ([0] * max_detections))[:5]
            box_preds = self.epoch_predictions[i].loc[frame, 'boxes']
            box_preds = [xyminmax_to_xywh(*p) for p in box_preds]
            box_preds = (box_preds + ([[0, 0, 0, 0]] * max_detections))[:5]
            color_lookup = {0: 'None', 1: '#FF1493', 2: '#00BFFF'}
            for j in range(5):
                boxes[j].set_xy([box_preds[j][0], box_preds[j][1]])
                boxes[j].set_width(box_preds[j][2])
                boxes[j].set_height(box_preds[j][3])
                boxes[j].set_edgecolor(color_lookup[label_preds[j]])
            return boxes

        anim = FuncAnimation(fig,
                             animate,
                             init_func=init,
                             frames=len(self.epoch_predictions),
                             blit=True,
                             interval=200,
                             repeat=False)
        ax.imshow(im, zorder=0)
        anim.save(join(self.fig_dir, 'animated_learning.gif'),
                  writer='imagemagick')
        plt.close('all')

    @plotter_decorator
    def iou_vs_epoch(self, fig: Figure):
        ious = []
        for ep in range(len(self.epoch_predictions)):
            ious.append(self._calc_epoch_iou(ep))
        ax = fig.add_subplot(111)
        ax.set(xlabel='epoch',
               ylabel='average iou',
               title='IOU score vs. Epoch')
        sns.lineplot(data=pd.Series(ious), ax=ax)
        pd.DataFrame({
            'iou': ious
        }).to_csv(join(self.fig_data_dir, 'iou_vs_epoch.csv'))

    @plotter_decorator
    def final_epoch_eval(self, fig: Figure):
        fig.set_size_inches(11, 8.5)
        epoch_index = len(self.epoch_predictions) - 1
        df, summary = self._full_epoch_eval(epoch_index)

        df = df.reset_index()
        no_err_val = df[df.n_boxes_predicted_error == 0].count()['Framefile']
        err_val = df[df.n_boxes_predicted_error != 0].count()['Framefile']
        pos = df[df.n_boxes_predicted_error > 0].count()['Framefile']
        neg = df[df.n_boxes_predicted_error < 0].count()['Framefile']

        ax1 = fig.add_subplot(221)  # top left
        ax2 = fig.add_subplot(222)  # top right
        ax3 = fig.add_subplot(223)  # bottom left
        ax4 = fig.add_subplot(224)  # bottom right

        sns.distplot(df.n_boxes_predicted_error,
                     hist_kws=dict(edgecolor="k", linewidth=0.5),
                     norm_hist=False,
                     kde=False,
                     ax=ax1)
        ax1.set(xlabel='Error Score', ylabel='No. of Frames')
        ax1.set_title("Distribution of Frames Over Error Values", fontsize=10)

        ax2.bar(x=['No Error', 'Error'],
                height=[no_err_val, err_val],
                color=['green', 'red'],
                width=0.4)
        ax2.set_ylabel('No. of Framefiles', fontsize=10)
        ax2.set_title('No. of Frames With Error vs Without Error', fontsize=10)

        sns.distplot(df.average_iou,
                     hist_kws=dict(edgecolor="k", linewidth=0.1),
                     norm_hist=False,
                     kde=False,
                     ax=ax3)
        ax3.set(xlabel='Average IOU', ylabel='No. of Frames')
        ax3.set_title("Distribution of Frames Over Average IOU Scores",
                      fontsize=10)

        ax4.bar(x=['Overestimation', 'Underestimation'],
                height=[pos, neg],
                color='red',
                width=0.4)
        ax4.set_title('Analysis of Errors', fontsize=10)
        ax4.set_ylabel('No. of Frames', fontsize=10)

    def _load_data(self):
        """load and parse all relevant data. Automatically syncs training dir with cloud if any files are missing"""
        required_files = [
            self.fm.local_files[x] for x in ['boxed_fish_csv', 'train_log']
        ]
        required_files.append(
            join(self.fm.local_files['predictions_dir'], '0.csv'))
        for f in required_files:
            if not exists(f):
                self.fm.sync_training_dir(
                    exclude=['labels/**', 'train_images/**'])
                break
        self.train_log = self._parse_train_log()
        self.num_epochs = len(self.train_log)
        self.ground_truth = self._parse_epoch_csv()
        self.epoch_predictions = []
        for epoch in range(self.num_epochs):
            self.epoch_predictions.append(self._parse_epoch_csv(epoch))

    def _parse_train_log(self):
        """parse the logfile that tracked overall loss and learning rate at each epoch

        Returns:
            Pandas Dataframe of losses and learning rate, indexed by epoch number
        """
        return pd.read_csv(self.fm.local_files['train_log'],
                           sep='\t',
                           index_col='epoch')

    def _parse_epoch_csv(self, epoch=-1):
        """parse the csv file of predictions produced when Trainer.train() is run with compare_annotations=True

        Notes:
            if the epoch arg is left at the default value of -1, this function will instead parse 'ground_truth.csv'

        Args:
            epoch(int): epoch number, where 0 refers to the first epoch. Defaults to -1, which parses the
                ground truth csv

        Returns:
            Pandas DataFrame of epoch data
        """
        if epoch == -1:
            path = self.fm.local_files['ground_truth_csv']
            usecols = ['Framefile', 'boxes', 'labels']
        else:
            path = join(self.fm.local_files['predictions_dir'],
                        '{}.csv'.format(epoch))
            usecols = ['Framefile', 'boxes', 'labels', 'scores']
        return pd.read_csv(
            path,
            usecols=usecols).set_index('Framefile').applymap(lambda x: eval(x))

    def _full_epoch_eval(self, epoch):
        ep = self.epoch_predictions[epoch]
        gt = self.ground_truth
        df = gt.join(ep, lsuffix='_actual', rsuffix='_predicted')
        df['n_boxes_actual'] = df.boxes_actual.apply(len)
        df['n_boxes_predicted'] = df.boxes_predicted.apply(len)
        df['n_boxes_predicted_error'] = df.n_boxes_predicted - df.n_boxes_actual
        df['average_iou'], df['act_to_pred_map'] = zip(
            *df.apply(lambda x: self._calc_frame_iou(
                x.boxes_actual, x.boxes_predicted, map_boxes=True),
                      axis=1))
        df['pred_to_act_map'] = df.apply(lambda x: self._flip_mapping(
            x.act_to_pred_map, x.n_boxes_predicted),
                                         axis=1)
        df['pred_accuracy'] = df.apply(lambda x: self._compare_labels(
            x.labels_actual, x.labels_predicted, x.pred_to_act_map),
                                       axis=1)
        df['avg_accuracy'] = df.pred_accuracy.apply(lambda x: sum(x) / len(x)
                                                    if len(x) > 0 else 1.0)
        df.to_csv(join(self.fig_data_dir, 'epoch_{}_eval.csv'.format(epoch)))

        summary = pd.Series()
        summary['classification_accuracy'] = np.average(
            df.avg_accuracy, weights=df.n_boxes_predicted)
        summary['average_iou'] = np.average(df.average_iou,
                                            weights=df.n_boxes_predicted)
        summary['n_predictions'] = df.n_boxes_predicted.sum()
        summary['n_annotations'] = df.n_boxes_actual.sum()
        summary['n_frames'] = len(df)
        summary.to_csv(
            join(self.fig_data_dir, 'epoch_{}_eval_summary.csv'.format(epoch)))

        return df, summary

    def _compare_labels(self, labels_actual, labels_predicted,
                        pred_to_act_map):
        """determine whether the label for each predicted box matches the label of the corresponding ground-truth box

        Args:
            labels_actual (list of ints): ground-truth label for each ground-truth box
            labels_predicted (list of ints): predicted label for each predicted box
            pred_to_act_map (list of ints): list mapping each predicted box to the ground truth box with the max iou

        Returns:
            list: outcomes, where outcomes[i] = 1 if labels_predicted[i] is correct, and 0 if it's incorrect
        """
        outcomes = []
        for i, pred in enumerate(labels_predicted):
            # append 0 if the predicted box does not overlap a ground truth box
            if pred_to_act_map[i] is None:
                outcomes.append(0)
            # else, append 1 if the predicted label was correct, or 0 if it was incorrect
            else:
                outcomes.append(1 if pred ==
                                labels_actual[pred_to_act_map[i]] else 0)
        return outcomes

    def _calc_precision(self):
        pass

    def _calc_recall(self):
        pass

    def _flip_mapping(self, a_to_b, len_b):
        if len_b == 0:
            return []
        else:
            mapping = []
            for i in range(len_b):
                try:
                    mapping.append(a_to_b.index(i))
                except ValueError:
                    mapping.append(None)
            return mapping

    def _calc_epoch_iou(self, epoch):
        """calculate the average iou across all test frames for a given epoch

        Args:
            epoch (int): epoch number

        Returns:
            float: average iou value per predicted box for the epoch
        """
        gt = self.ground_truth
        ep = self.epoch_predictions[epoch]
        combo = gt.join(ep, lsuffix='_gt', rsuffix='_ep')
        combo['frame_iou'] = combo.apply(
            lambda x: self._calc_frame_iou(x.boxes_gt, x.boxes_ep), axis=1)
        combo['n_boxes_ep'] = combo.boxes_ep.apply(len)
        return np.average(combo.frame_iou, weights=combo.n_boxes_ep)

    def _calc_frame_iou(self, actual_boxes, predicted_boxes, map_boxes=False):
        """calculate the average iou for a frame

        Args:
            actual_boxes (list of lists of 4 ints): list of ground truth bounding boxes
            predicted_boxes (list of lists of 4 ints): list of predicted bounding boxes
            map_boxes: if True, also return a list of ints mapping the ground truth boxes to the
                predicted box with the highest iou value

        Returns:
            if map_boxes is False, returns:
                float: mean iou value for the given frame
            if map_boxes is True, returns:
                float: mean iou value for the given frame
                list of ints: if map_boxes is True, also returns a list mapping the actual boxes to the
                    the predicted boxes, where actual_box[i] maps to predicted_boxes[mapping_list[i]]
                    (and predicted_box[j] maps to actual_boxes[mapping_list.index(j)]
                    if the actual_boxes list is empty, returns an empty list. for ground truth boxes that do not
                    intersect a predicted box, the list will contain a None element
        """
        a_bs = actual_boxes
        p_bs = predicted_boxes
        # if the model predicts no boxes for an empty frame, return a perfect score of 1.0 (and an empty mapping list)
        if (len(a_bs) == 0) and (len(p_bs) == 0):
            return (1.0, []) if map_boxes else 1.0

        # elif the model predicts no boxes for a frame with one or more fish, return a score of 0.0 (and mapping list of
        # null values with the same length as the number of actual boxes)
        elif (len(a_bs) > 0) and (len(p_bs) == 0):
            return (0.0, [None] * len(a_bs)) if map_boxes else 0.0

        # elif the model predicts 1 or more boxes for a frame with no fish, return a score of 0 (and an empty mapping
        # list)
        elif (len(a_bs) == 0) and (len(p_bs) > 0):
            return (0.0, []) if map_boxes else 0.0

        # elif the model predicted 1 or more boxes for a frame with 1 or more fish, calculate the iou for each
        # ground truth box with its best match, and average those values for the frame
        elif (len(a_bs) > 0) and (len(p_bs) > 0):
            ious = []
            mapping = []
            # for each actual box, find the largest iou score of that box with a one of the predicted boxes
            for a_b in a_bs:
                max_iou = 0.0
                max_iou_mapping = None
                for i, p_b in enumerate(p_bs):
                    iou = self._calc_iou(a_b, p_b)
                    if iou > max_iou:
                        max_iou = iou
                        max_iou_mapping = i
                ious.append(max_iou)
                mapping.append(max_iou_mapping)
            # if the model predicted more boxes than there are objects, penalize by scoring the remaining boxes 0.0
            if len(a_bs) < len(p_bs):
                ious.extend([0.0] * (len(p_bs) - len(a_bs)))
            # return the mean
            return (np.mean(ious), mapping) if map_boxes else np.mean(ious)

    def _calc_iou(self, box_a, box_b):
        """calculate the iou between box_a and box_b"""
        a = box_a
        b = box_b
        if (len(box_a) == 0) and (len(box_b) == 0):
            return 1.0
        # find area of the box formed by the intersection of box_a and box_b
        xa, ya, xb, yb = (max(a[0], b[0]), max(a[1], b[1]), min(a[2], b[2]),
                          min(a[3], b[3]))
        intersection = max(0, xb - xa + 1) * max(0, yb - ya + 1)
        # if the boxes do not intersect, short-circuit and return 0.0
        if intersection == 0:
            return 0.0
        # else, calculate the area of the union of box_a and box_b, and return intersection/union
        else:
            a_area = (a[2] - a[0] + 1) * (a[3] - a[1] + 1)
            b_area = (b[2] - b[0] + 1) * (b[3] - b[1] + 1)
            union = float(a_area + b_area - intersection)
            return intersection / union
Exemple #9
0
        [
            com.extend(
                list(
                    chain.from_iterable(
                        zip(['--exclude'] * len(exclude), exclude))))
            for com in [down, up]
        ]
    [run(com) for com in [down, up]]


# Measure duration of program
s = ctime(time.time())
print("Start Time (Full): ", ctime(time.time()))

# Initialize functions. Create project directory and download the specified files
fm = FileManager()
pfm = ProjectFileManager(args.pid, fm, args.download_images,
                         args.download_video, args.video)
print('downloaded video, created directories!')

# Storing video path. Setting video and final csv file names.
video_path = os.path.join(pfm.local_files['{}_dir'.format(args.pid)],
                          args.video)
video_name = args.video.split('.')[0]
csv_name = '{}_{}_detections.csv'.format(args.pid, video_name)

if args.full:
    """
        1. Run all the processes - video trimming, detections, video annotation
        2. Create intervals list and iterate through them to crop video and feed it into the model
    """
 def __init__(self):
     """initiate a FileManager object, and and empty dictionary to store a ProjectFileManager object for each project"""
     self.file_manager = FileManager()
     self.proj_file_managers = {}
 def __init__(self):
     self.fm = FileManager()
     self.csv_dir = self.fm.local_files['figure_data_dir']
     self.file = 'epoch_99_eval.csv'
     self.data = os.path.join(self.csv_dir, self.file)
     self.img_dir = self.fm.local_files['test_image_dir']
 def __init__(self):
     self.fm = FileManager()
     self.pid = 'MC6_5'
Exemple #13
0
host = socket.gethostname()

# if running from a PACE login node, assert that args.command == full_auto, then submit the train.pbs script
if ('login' in host) and ('pace' in host):
    assert (args.command == 'full_auto'
            ), 'full_auto is the only mode currently on a PACE login node'
    pbs_dir = os.path.join(package_root, 'CichlidDetection/PBS')
    subprocess.run(
        ['qsub', 'train.pbs', '-v', 'EPOCHS={}'.format(args.Epochs)],
        cwd=pbs_dir)

# if not on a PACE login node, begin the analysis specified by args.command
else:
    if args.command == 'sync':
        from CichlidDetection.Classes.FileManager import FileManager
        FileManager().sync_training_dir()

    else:
        from CichlidDetection.Classes.Runner import Runner
        runner = Runner()

        if args.command == 'full_auto':
            runner.download()
            runner.prep()
            runner.train(num_epochs=args.Epochs)

        elif args.command == 'download':
            runner.download()

        elif args.command == 'train':
            runner.prep()
Exemple #14
0
 def __init__(self, *args):
     # initialize detector
     for i in args:
         self.pfm = i
     self.fm = FileManager()
     self._initiate_model()
Exemple #15
0
 def __init__(self, *args):
     self.fm = FileManager()